From 15a73c6a791713f7b85723d692f4188dd108cbe6 Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Thu, 19 Jun 2025 09:23:22 -0400 Subject: [PATCH 1/5] [MC] Use a variant to hold MCCFIInstruction state (NFC) AMDGPU requires more complex CFI rules, normally these would be expressed with .cfi_escape, however this would make the CFI unreadable and makes it difficult to update registers in CFI instructions (also something AMDGPU requires). --- llvm/include/llvm/MC/MCDwarf.h | 137 +++++++++++++++------------------ 1 file changed, 60 insertions(+), 77 deletions(-) diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 9944a9a92ab1f..640d2eeb68d61 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -29,6 +29,7 @@ #include #include #include +#include #include namespace llvm { @@ -531,67 +532,47 @@ class MCCFIInstruction { OpValOffset, }; + // Held in ExtraFields for most common OpTypes, exceptions follow. + struct CommonFields { + unsigned Register = std::numeric_limits::max(); + int64_t Offset = 0; + unsigned Register2 = std::numeric_limits::max(); + unsigned AddressSpace = 0; + }; + // Held in ExtraFields when OpEscape. + struct EscapeFields { + std::vector Values; + std::string Comment; + }; + // Held in ExtraFields when OpLabel. + struct LabelFields { + MCSymbol *CfiLabel = nullptr; + }; + private: MCSymbol *Label; - union { - struct { - unsigned Register; - int64_t Offset; - } RI; - struct { - unsigned Register; - int64_t Offset; - unsigned AddressSpace; - } RIA; - struct { - unsigned Register; - unsigned Register2; - } RR; - MCSymbol *CfiLabel; - } U; + std::variant ExtraFields; OpType Operation; SMLoc Loc; - std::vector Values; - std::string Comment; - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, SMLoc Loc, - StringRef V = "", StringRef Comment = "") - : Label(L), Operation(Op), Loc(Loc), Values(V.begin(), V.end()), - Comment(Comment) { - assert(Op != OpRegister && Op != OpLLVMDefAspaceCfa); - U.RI = {R, O}; - } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2, SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpRegister); - U.RR = {R1, R2}; - } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, unsigned AS, - SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpLLVMDefAspaceCfa); - U.RIA = {R, O, AS}; - } - - MCCFIInstruction(OpType Op, MCSymbol *L, MCSymbol *CfiLabel, SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpLabel); - U.CfiLabel = CfiLabel; - } + template + MCCFIInstruction(OpType Op, MCSymbol *L, FieldsType &&EF, SMLoc Loc) + : Label(L), ExtraFields(std::forward(EF)), Operation(Op), + Loc(Loc) {} public: /// .cfi_def_cfa defines a rule for computing CFA as: take address from /// Register and add Offset to it. static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfa, L, Register, Offset, Loc); + return {OpDefCfa, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_def_cfa_register modifies a rule for computing CFA. From now /// on Register will be used instead of the old one. Offset remains the same. static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfaRegister, L, Register, INT64_C(0), Loc); + return {OpDefCfaRegister, L, CommonFields{Register}, Loc}; } /// .cfi_def_cfa_offset modifies a rule for computing CFA. Register @@ -599,7 +580,7 @@ class MCCFIInstruction { /// that will be added to a defined register to the compute CFA address. static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfaOffset, L, 0, Offset, Loc); + return {OpDefCfaOffset, L, CommonFields{0, Offset}, Loc}; } /// .cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but @@ -607,7 +588,7 @@ class MCCFIInstruction { /// offset. static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc = {}) { - return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, Loc); + return {OpAdjustCfaOffset, L, CommonFields{0, Adjustment}, Loc}; } // FIXME: Update the remaining docs to use the new proposal wording. @@ -618,15 +599,15 @@ class MCCFIInstruction { int64_t Offset, unsigned AddressSpace, SMLoc Loc) { - return MCCFIInstruction(OpLLVMDefAspaceCfa, L, Register, Offset, - AddressSpace, Loc); + return {OpLLVMDefAspaceCfa, L, + CommonFields{Register, Offset, 0, AddressSpace}, Loc}; } /// .cfi_offset Previous value of Register is saved at offset Offset /// from CFA. static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpOffset, L, Register, Offset, Loc); + return {OpOffset, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_rel_offset Previous value of Register is saved at offset @@ -634,30 +615,30 @@ class MCCFIInstruction { /// using the known displacement of the CFA register from the CFA. static MCCFIInstruction createRelOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpRelOffset, L, Register, Offset, Loc); + return {OpRelOffset, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_register Previous value of Register1 is saved in /// register Register2. static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc = {}) { - return MCCFIInstruction(OpRegister, L, Register1, Register2, Loc); + return {OpRegister, L, CommonFields{Register1, 0, Register2}, Loc}; } /// .cfi_window_save SPARC register window is saved. static MCCFIInstruction createWindowSave(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpWindowSave, L, 0, INT64_C(0), Loc); + return {OpWindowSave, L, CommonFields{}, Loc}; } /// .cfi_negate_ra_state AArch64 negate RA state. static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc); + return {OpNegateRAState, L, CommonFields{}, Loc}; } /// .cfi_negate_ra_state_with_pc AArch64 negate RA state with PC. static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc); + return {OpNegateRAStateWithPC, L, CommonFields{}, Loc}; } /// .cfi_restore says that the rule for Register is now the same as it @@ -665,104 +646,106 @@ class MCCFIInstruction { /// by .cfi_startproc were executed. static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpRestore, L, Register, INT64_C(0), Loc); + return {OpRestore, L, CommonFields{Register}, Loc}; } /// .cfi_undefined From now on the previous value of Register can't be /// restored anymore. static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpUndefined, L, Register, INT64_C(0), Loc); + return {OpUndefined, L, CommonFields{Register}, Loc}; } /// .cfi_same_value Current value of Register is the same as in the /// previous frame. I.e., no restoration is needed. static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpSameValue, L, Register, INT64_C(0), Loc); + return {OpSameValue, L, CommonFields{Register}, Loc}; } /// .cfi_remember_state Save all current rules for all registers. static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpRememberState, L, 0, INT64_C(0), Loc); + return {OpRememberState, L, CommonFields{}, Loc}; } /// .cfi_restore_state Restore the previously saved state. static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpRestoreState, L, 0, INT64_C(0), Loc); + return {OpRestoreState, L, CommonFields{}, Loc}; } /// .cfi_escape Allows the user to add arbitrary bytes to the unwind /// info. static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc = {}, StringRef Comment = "") { - return MCCFIInstruction(OpEscape, L, 0, 0, Loc, Vals, Comment); + return {OpEscape, L, + EscapeFields{std::vector(Vals.begin(), Vals.end()), + Comment.str()}, + Loc}; } /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc = {}) { - return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, Loc); + return {OpGnuArgsSize, L, CommonFields{0, Size}, Loc}; } static MCCFIInstruction createLabel(MCSymbol *L, MCSymbol *CfiLabel, SMLoc Loc) { - return MCCFIInstruction(OpLabel, L, CfiLabel, Loc); + return {OpLabel, L, LabelFields{CfiLabel}, Loc}; } /// .cfi_val_offset Previous value of Register is offset Offset from the /// current CFA register. static MCCFIInstruction createValOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpValOffset, L, Register, Offset, Loc); + return {OpValOffset, L, CommonFields{Register, Offset}, Loc}; } OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } unsigned getRegister() const { - if (Operation == OpRegister) - return U.RR.Register; - if (Operation == OpLLVMDefAspaceCfa) - return U.RIA.Register; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRestore || Operation == OpUndefined || Operation == OpSameValue || Operation == OpDefCfaRegister || - Operation == OpRelOffset || Operation == OpValOffset); - return U.RI.Register; + Operation == OpRelOffset || Operation == OpValOffset || + Operation == OpRegister || Operation == OpLLVMDefAspaceCfa); + return std::get(ExtraFields).Register; } unsigned getRegister2() const { assert(Operation == OpRegister); - return U.RR.Register2; + return std::get(ExtraFields).Register2; } unsigned getAddressSpace() const { assert(Operation == OpLLVMDefAspaceCfa); - return U.RIA.AddressSpace; + return std::get(ExtraFields).AddressSpace; } int64_t getOffset() const { - if (Operation == OpLLVMDefAspaceCfa) - return U.RIA.Offset; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRelOffset || Operation == OpDefCfaOffset || Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize || - Operation == OpValOffset); - return U.RI.Offset; + Operation == OpValOffset || Operation == OpLLVMDefAspaceCfa); + return std::get(ExtraFields).Offset; } MCSymbol *getCfiLabel() const { assert(Operation == OpLabel); - return U.CfiLabel; + return std::get(ExtraFields).CfiLabel; } StringRef getValues() const { assert(Operation == OpEscape); + auto &Values = std::get(ExtraFields).Values; return StringRef(&Values[0], Values.size()); } - StringRef getComment() const { return Comment; } + StringRef getComment() const { + assert(Operation == OpEscape); + return std::get(ExtraFields).Comment; + } SMLoc getLoc() const { return Loc; } }; From b3d52d3c8740c0c378d2696aaffb9a5bd87d50c2 Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Thu, 19 Jun 2025 10:59:31 -0400 Subject: [PATCH 2/5] [MC][Dwarf] Add custom CFI pseudo-ops for use in AMDGPU While these can be represented with .cfi_escape, using these pseudo-cfi instructions makes .s/.mir files more readable, and it is necessary to support updating registers in CFI instructions (something that the AMDGPU backend requires). --- llvm/include/llvm/MC/MCDwarf.h | 92 ++++++++- llvm/include/llvm/MC/MCStreamer.h | 18 ++ .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 33 ++++ llvm/lib/CodeGen/CFIInstrInserter.cpp | 4 + llvm/lib/CodeGen/MIRParser/MILexer.cpp | 5 + llvm/lib/CodeGen/MIRParser/MILexer.h | 4 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 75 +++++++ llvm/lib/CodeGen/MachineOperand.cpp | 58 ++++++ llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp | 10 + llvm/lib/MC/MCAsmStreamer.cpp | 76 +++++++ llvm/lib/MC/MCDwarf.cpp | 186 ++++++++++++++++++ llvm/lib/MC/MCParser/AsmParser.cpp | 106 ++++++++++ llvm/lib/MC/MCParser/MasmParser.cpp | 4 + llvm/lib/MC/MCStreamer.cpp | 54 +++++ .../heterogeneous-dwarf-cfi-directives.s | 57 ++++++ llvm/test/MC/ELF/cfi-register-pair.s | 56 ++++++ llvm/test/MC/ELF/cfi-vector-offset.s | 56 ++++++ llvm/test/MC/ELF/cfi-vector-registers.s | 56 ++++++ 18 files changed, 949 insertions(+), 1 deletion(-) create mode 100644 llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s create mode 100644 llvm/test/MC/ELF/cfi-register-pair.s create mode 100644 llvm/test/MC/ELF/cfi-vector-offset.s create mode 100644 llvm/test/MC/ELF/cfi-vector-registers.s diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 640d2eeb68d61..e602f03de5ebf 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -530,6 +530,10 @@ class MCCFIInstruction { OpGnuArgsSize, OpLabel, OpValOffset, + OpLLVMRegisterPair, + OpLLVMVectorRegisters, + OpLLVMVectorOffset, + OpLLVMVectorRegisterMask, }; // Held in ExtraFields for most common OpTypes, exceptions follow. @@ -548,10 +552,45 @@ class MCCFIInstruction { struct LabelFields { MCSymbol *CfiLabel = nullptr; }; + /// Held in ExtraFields when OpLLVMRegisterPair. + struct RegisterPairFields { + unsigned Register; + unsigned Reg1, Reg2; + unsigned Reg1SizeInBits, Reg2SizeInBits; + }; + struct VectorRegisterWithLane { + unsigned Register; + unsigned Lane; + unsigned SizeInBits; + }; + /// Held in ExtraFields when OpLLVMVectorRegisters. + struct VectorRegistersFields { + unsigned Register; + std::vector VectorRegisters; + }; + /// Held in ExtraFields when OpLLVMVectorOffset. + struct VectorOffsetFields { + unsigned Register; + unsigned RegisterSizeInBits; + int64_t Offset; + unsigned MaskRegister; + unsigned MaskRegisterSizeInBits; + }; + /// Held in ExtraFields when OpLLVMVectorRegisterMask. + struct VectorRegisterMaskFields { + unsigned Register; + unsigned SpillRegister; + unsigned SpillRegisterLaneSizeInBits; + unsigned MaskRegister; + unsigned MaskRegisterSizeInBits; + }; private: MCSymbol *Label; - std::variant ExtraFields; + std::variant + ExtraFields; OpType Operation; SMLoc Loc; @@ -694,6 +733,57 @@ class MCCFIInstruction { return {OpLabel, L, LabelFields{CfiLabel}, Loc}; } + /// .cfi_llvm_register_pair Previous value of Register is saved in R1:R2. + static MCCFIInstruction + createLLVMRegisterPair(MCSymbol *L, unsigned Register, unsigned R1, + unsigned R1SizeInBits, unsigned R2, + unsigned R2SizeInBits, SMLoc Loc = {}) { + RegisterPairFields Extra{Register, R1, R2, R1SizeInBits, R2SizeInBits}; + return {OpLLVMRegisterPair, L, Extra, Loc}; + } + + /// .cfi_llvm_vector_registers Previous value of Register is saved in lanes of + /// vector registers. + static MCCFIInstruction + createLLVMVectorRegisters(MCSymbol *L, unsigned Register, + std::vector VectorRegisters, + SMLoc Loc = {}) { + VectorRegistersFields Extra{Register, std::move(VectorRegisters)}; + return {OpLLVMVectorRegisters, L, std::move(Extra), Loc}; + } + + /// .cfi_llvm_vector_offset Previous value of Register is saved at Offset from + /// CFA. MaskRegister specifies the active lanes of register. + static MCCFIInstruction + createLLVMVectorOffset(MCSymbol *L, unsigned Register, + unsigned RegisterSizeInBits, unsigned MaskRegister, + unsigned MaskRegisterSizeInBits, int64_t Offset, + SMLoc Loc = {}) { + VectorOffsetFields Extra{Register, RegisterSizeInBits, Offset, MaskRegister, + MaskRegisterSizeInBits}; + return MCCFIInstruction(OpLLVMVectorOffset, L, Extra, Loc); + } + + /// .cfi_llvm_vector_register_mask Previous value of Register is saved in + /// SpillRegister, predicated on the value of MaskRegister. + static MCCFIInstruction createLLVMVectorRegisterMask( + MCSymbol *L, unsigned Register, unsigned SpillRegister, + unsigned SpillRegisterLaneSizeInBits, unsigned MaskRegister, + unsigned MaskRegisterSizeInBits, SMLoc Loc = {}) { + VectorRegisterMaskFields Extra{ + Register, SpillRegister, SpillRegisterLaneSizeInBits, + MaskRegister, MaskRegisterSizeInBits, + }; + return MCCFIInstruction(OpLLVMVectorRegisterMask, L, Extra, Loc); + } + + template ExtraFieldsTy &getExtraFields() { + return std::get(ExtraFields); + } + + template const ExtraFieldsTy &getExtraFields() const { + return std::get(ExtraFields); + } /// .cfi_val_offset Previous value of Register is offset Offset from the /// current CFA register. static MCCFIInstruction createValOffset(MCSymbol *L, unsigned Register, diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 79c715e3820a6..4e76aa323eb30 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1012,6 +1012,24 @@ class LLVM_ABI MCStreamer { SMLoc Loc = {}); virtual void emitCFIWindowSave(SMLoc Loc = {}); virtual void emitCFINegateRAState(SMLoc Loc = {}); + virtual void emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1SizeInBits, int64_t R2, + int64_t R2SizeInBits, SMLoc Loc = {}); + virtual void emitCFILLVMVectorRegisters( + int64_t Register, + std::vector VRs, + SMLoc Loc = {}); + virtual void emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + int64_t Offset, SMLoc Loc = {}); + virtual void + emitCFILLVMVectorRegisterMask(int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc = {}); + virtual void emitCFINegateRAStateWithPC(SMLoc Loc = {}); virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name); virtual void emitCFIValOffset(int64_t Register, int64_t Offset, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 2a146eb15f709..895c18abc56f9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -260,6 +260,39 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpRestoreState: OutStreamer->emitCFIRestoreState(Loc); break; + case MCCFIInstruction::OpLLVMRegisterPair: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMRegisterPair(Fields.Register, Fields.Reg1, + Fields.Reg1SizeInBits, Fields.Reg2, + Fields.Reg2SizeInBits, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorRegisters: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorRegisters(Fields.Register, + Fields.VectorRegisters, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorOffset( + Fields.Register, Fields.RegisterSizeInBits, Fields.MaskRegister, + Fields.MaskRegisterSizeInBits, Fields.Offset, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorRegisterMask( + Fields.Register, Fields.SpillRegister, + Fields.SpillRegisterLaneSizeInBits, Fields.MaskRegister, + Fields.MaskRegisterSizeInBits); + break; + } + case MCCFIInstruction::OpValOffset: OutStreamer->emitCFIValOffset(Inst.getRegister(), Inst.getOffset(), Loc); break; diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 14098bc821617..0d60d17da0cf7 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -262,6 +262,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpGnuArgsSize: + case MCCFIInstruction::OpLLVMRegisterPair: + case MCCFIInstruction::OpLLVMVectorRegisters: + case MCCFIInstruction::OpLLVMVectorOffset: + case MCCFIInstruction::OpLLVMVectorRegisterMask: case MCCFIInstruction::OpLabel: case MCCFIInstruction::OpValOffset: break; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 8b72c295416a2..8ed590669a3b0 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -240,6 +240,11 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("window_save", MIToken::kw_cfi_window_save) .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("llvm_register_pair", MIToken::kw_cfi_llvm_register_pair) + .Case("llvm_vector_registers", MIToken::kw_cfi_llvm_vector_registers) + .Case("llvm_vector_offset", MIToken::kw_cfi_llvm_vector_offset) + .Case("llvm_vector_register_mask", + MIToken::kw_cfi_llvm_vector_register_mask) .Case("negate_ra_sign_state_with_pc", MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc) .Case("blockaddress", MIToken::kw_blockaddress) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 0627f176b9e00..abac1880f94e0 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -98,6 +98,10 @@ struct MIToken { kw_cfi_undefined, kw_cfi_window_save, kw_cfi_aarch64_negate_ra_sign_state, + kw_cfi_llvm_register_pair, + kw_cfi_llvm_vector_registers, + kw_cfi_llvm_vector_offset, + kw_cfi_llvm_vector_register_mask, kw_cfi_aarch64_negate_ra_sign_state_with_pc, kw_blockaddress, kw_intrinsic, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9dd6886..3618022d89bed 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -484,6 +484,7 @@ class MIParser { bool parseDILocation(MDNode *&Expr); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); + bool parseCFIUnsigned(unsigned &Value); bool parseCFIRegister(unsigned &Reg); bool parseCFIAddressSpace(unsigned &AddressSpace); bool parseCFIEscapeValues(std::string& Values); @@ -2475,6 +2476,13 @@ bool MIParser::parseCFIOffset(int &Offset) { return false; } +bool MIParser::parseCFIUnsigned(unsigned &Value) { + if (getUnsigned(Value)) + return true; + lex(); + return false; +} + bool MIParser::parseCFIRegister(unsigned &Reg) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a cfi register"); @@ -2608,6 +2616,69 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_aarch64_negate_ra_sign_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); break; + case MIToken::kw_cfi_llvm_register_pair: { + unsigned Reg, R1, R2; + unsigned R1Size, R2Size; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(R1) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(R1Size) || expectAndConsume(MIToken::comma) || + parseCFIRegister(R2) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(R2Size)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMRegisterPair( + nullptr, Reg, R1, R1Size, R2, R2Size)); + break; + } + case MIToken::kw_cfi_llvm_vector_registers: { + std::vector VectorRegisters; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma)) + return true; + do { + unsigned VR; + unsigned Lane, Size; + if (parseCFIRegister(VR) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(Lane) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(Size)) + return true; + VectorRegisters.push_back({VR, Lane, Size}); + } while (consumeIfPresent(MIToken::comma)); + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorRegisters( + nullptr, Reg, std::move(VectorRegisters))); + break; + } + case MIToken::kw_cfi_llvm_vector_offset: { + unsigned Reg, MaskReg; + unsigned RegSize, MaskRegSize; + int Offset = 0; + + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(RegSize) || expectAndConsume(MIToken::comma) || + parseCFIRegister(MaskReg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(MaskRegSize) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorOffset( + nullptr, Reg, RegSize, MaskReg, MaskRegSize, Offset)); + break; + } + case MIToken::kw_cfi_llvm_vector_register_mask: { + unsigned Reg, SpillReg, MaskReg; + unsigned SpillRegLaneSize, MaskRegSize; + + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(SpillReg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(SpillRegLaneSize) || + expectAndConsume(MIToken::comma) || parseCFIRegister(MaskReg) || + expectAndConsume(MIToken::comma) || parseCFIUnsigned(MaskRegSize)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorRegisterMask( + nullptr, Reg, SpillReg, SpillRegLaneSize, MaskReg, MaskRegSize)); + break; + } case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAStateWithPC(nullptr)); @@ -2962,6 +3033,10 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::kw_cfi_undefined: case MIToken::kw_cfi_window_save: case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + case MIToken::kw_cfi_llvm_register_pair: + case MIToken::kw_cfi_llvm_vector_registers: + case MIToken::kw_cfi_llvm_vector_offset: + case MIToken::kw_cfi_llvm_vector_register_mask: case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index bb9c76ff0c729..db802cc6e9024 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -778,6 +778,64 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, if (MCSymbol *Label = CFI.getLabel()) MachineOperand::printSymbol(OS, *Label); break; + case MCCFIInstruction::OpLLVMRegisterPair: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_register_pair "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", "; + printCFIRegister(Fields.Reg1, OS, TRI); + OS << ", " << Fields.Reg1SizeInBits << ", "; + printCFIRegister(Fields.Reg2, OS, TRI); + OS << ", " << Fields.Reg2SizeInBits; + break; + } + case MCCFIInstruction::OpLLVMVectorRegisters: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_registers "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + for (auto [Reg, Lane, Size] : Fields.VectorRegisters) { + OS << ", "; + printCFIRegister(Reg, OS, TRI); + OS << ", " << Lane << ", " << Size; + } + break; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", " << Fields.RegisterSizeInBits << ", "; + printCFIRegister(Fields.MaskRegister, OS, TRI); + OS << ", " << Fields.MaskRegisterSizeInBits << ", " << Fields.Offset; + break; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_register_mask "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", "; + printCFIRegister(Fields.SpillRegister, OS, TRI); + OS << ", " << Fields.SpillRegisterLaneSizeInBits << ", "; + printCFIRegister(Fields.MaskRegister, OS, TRI); + OS << ", " << Fields.MaskRegisterSizeInBits; + break; + } case MCCFIInstruction::OpNegateRAStateWithPC: OS << "negate_ra_sign_state_with_pc "; if (MCSymbol *Label = CFI.getLabel()) diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index bca820fa807c8..4d2d2da8a4445 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -161,6 +161,16 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) { CFIP.addInstruction(dwarf::DW_CFA_val_offset, Directive.getRegister(), Directive.getOffset()); break; + case MCCFIInstruction::OpLLVMRegisterPair: + case MCCFIInstruction::OpLLVMVectorRegisters: + case MCCFIInstruction::OpLLVMVectorOffset: + case MCCFIInstruction::OpLLVMVectorRegisterMask: + // TODO: These should be pretty straightforward to support, but is low + // priority. Similarly the implementation of OpLLVMDefAspaceCfa above + // seem incomplete and should be fixed. + Context->reportWarning(Directive.getLoc(), + "this directive is not supported, ignoring it"); + break; } return CFIP; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index be8c022f39ad1..6c54a9efbf2c6 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -369,6 +369,21 @@ class MCAsmStreamer final : public MCStreamer { void emitCFINegateRAState(SMLoc Loc) override; void emitCFINegateRAStateWithPC(SMLoc Loc) override; void emitCFIReturnColumn(int64_t Register) override; + void emitCFILLVMRegisterPair(int64_t Register, int64_t R1, int64_t R1Size, + int64_t R2, int64_t R2Size, SMLoc Loc) override; + void emitCFILLVMVectorRegisters( + int64_t Register, + std::vector VRs, + SMLoc Loc) override; + void emitCFILLVMVectorOffset(int64_t Register, int64_t RegisterSize, + int64_t MaskRegister, int64_t MaskRegisterSize, + int64_t Offset, SMLoc Loc) override; + void emitCFILLVMVectorRegisterMask(int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + SMLoc Loc) override; + void emitCFILabelDirective(SMLoc Loc, StringRef Name) override; void emitCFIValOffset(int64_t Register, int64_t Offset, SMLoc Loc) override; @@ -2101,6 +2116,67 @@ void MCAsmStreamer::emitCFIRegister(int64_t Register1, int64_t Register2, EmitEOL(); } +void MCAsmStreamer::emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1Size, int64_t R2, + int64_t R2Size, SMLoc Loc) { + MCStreamer::emitCFILLVMRegisterPair(Register, R1, R1Size, R2, R2Size, Loc); + + OS << "\t.cfi_llvm_register_pair "; + EmitRegisterName(Register); + OS << ", "; + EmitRegisterName(R1); + OS << ", " << R1Size << ", "; + EmitRegisterName(R2); + OS << ", " << R2Size; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorRegisters( + int64_t Register, std::vector VRs, + SMLoc Loc) { + MCStreamer::emitCFILLVMVectorRegisters(Register, VRs, Loc); + + OS << "\t.cfi_llvm_vector_registers "; + EmitRegisterName(Register); + for (auto [Reg, Lane, Size] : VRs) + OS << ", " << Reg << ", " << Lane << ", " << Size; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSize, + int64_t MaskRegister, + int64_t MaskRegisterSize, + int64_t Offset, SMLoc Loc) { + MCStreamer::emitCFILLVMVectorOffset(Register, RegisterSize, MaskRegister, + MaskRegisterSize, Offset, Loc); + + OS << "\t.cfi_llvm_vector_offset "; + EmitRegisterName(Register); + OS << ", " << RegisterSize << ", "; + EmitRegisterName(MaskRegister); + OS << ", " << MaskRegisterSize << ", " << Offset; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorRegisterMask( + int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc) { + MCStreamer::emitCFILLVMVectorRegisterMask( + Register, SpillRegister, SpillRegisterLaneSizeInBits, MaskRegister, + MaskRegisterSizeInBits, Loc); + + OS << "\t.cfi_llvm_vector_register_mask "; + EmitRegisterName(Register); + OS << ", "; + EmitRegisterName(SpillRegister); + OS << ", " << SpillRegisterLaneSizeInBits << ", "; + EmitRegisterName(MaskRegister); + OS << ", " << MaskRegisterSizeInBits; + EmitEOL(); +} + void MCAsmStreamer::emitCFIWindowSave(SMLoc Loc) { MCStreamer::emitCFIWindowSave(Loc); OS << "\t.cfi_window_save"; diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index e8f000a584839..09a93dd34ece3 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1377,6 +1377,16 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) { Streamer.emitInt8(Encoding); } +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + assert(DwarfReg >= 0); + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); auto *MRI = Streamer.getContext().getRegisterInfo(); @@ -1521,6 +1531,7 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpEscape: Streamer.emitBytes(Instr.getValues()); return; + case MCCFIInstruction::OpLabel: Streamer.emitLabel(Instr.getCfiLabel(), Instr.getLoc()); return; @@ -1543,7 +1554,182 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { } return; } + case MCCFIInstruction::OpLLVMRegisterPair: { + // CFI for a register spilled to a pair of SGPRs is implemented as an + // expression(E) rule where E is a composite location description with + // multiple parts each referencing SGPR register location storage with a bit + // offset of 0. In other words we generate the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_piece ) + // (DW_OP_regx ) (DW_OP_piece ) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. + // the implictly pushed one is just ignored.) + + const auto &Fields = + Instr.getExtraFields(); + + SmallString<10> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Reg1, OSBlock); + if (Fields.Reg1SizeInBits % 8 == 0) { + OSBlock << uint8_t(dwarf::DW_OP_piece); + encodeULEB128(Fields.Reg1SizeInBits / 8, OSBlock); + } else { + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(Fields.Reg1SizeInBits, OSBlock); + encodeULEB128(0, OSBlock); + } + encodeDwarfRegisterLocation(Fields.Reg2, OSBlock); + if (Fields.Reg2SizeInBits % 8 == 0) { + OSBlock << uint8_t(dwarf::DW_OP_piece); + encodeULEB128(Fields.Reg2SizeInBits / 8, OSBlock); + } else { + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(Fields.Reg2SizeInBits, OSBlock); + encodeULEB128(0, OSBlock); + } + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; } + case MCCFIInstruction::OpLLVMVectorRegisters: { + // CFI for an SGPR spilled to a multiple lanes of VGPRs is implemented as an + // expression(E) rule where E is a composite location description with + // multiple parts each referencing VGPR register location storage with a bit + // offset of the lane index multiplied by the size of a lane. In other words + // we generate the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // ... + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // + // However if we're only using a single lane then we can emit a slightly + // more optimal form: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_LLVM_offset_uconst *) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. + // the implictly pushed one is just ignored.) + + const auto &Fields = + Instr.getExtraFields(); + auto &VRs = Fields.VectorRegisters; + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + if (VRs.size() == 1 && VRs[0].SizeInBits % 8 == 0) { + encodeDwarfRegisterLocation(VRs[0].Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + encodeULEB128((VRs[0].SizeInBits / 8) * VRs[0].Lane, OSBlock); + } else { + for (const auto &VR : VRs) { + // TODO: Detect when we can merge multiple adjacent pieces, or even + // reduce this to a register location description (when all pieces are + // adjacent). + encodeDwarfRegisterLocation(VR.Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(VR.SizeInBits, OSBlock); + encodeULEB128(VR.SizeInBits * VR.Lane, OSBlock); + } + } + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + // CFI for a vector register spilled to memory is implemented as an + // expression(E) rule where E is a location description. + // + // DW_CFA_expression: , + // (DW_OP_regx ) + // (DW_OP_swap) + // (DW_OP_LLVM_offset_uconst ) + // (DW_OP_LLVM_call_frame_entry_reg ) + // (DW_OP_deref_size ) + // (DW_OP_LLVM_select_bit_piece ) + + const auto &Fields = + Instr.getExtraFields(); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_swap); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + encodeULEB128(Fields.Offset, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_call_frame_entry_reg); + encodeULEB128(Fields.MaskRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size); + OSBlock << uint8_t(Fields.MaskRegisterSizeInBits / 8); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_select_bit_piece); + encodeULEB128(Fields.RegisterSizeInBits, OSBlock); + encodeULEB128(Fields.MaskRegisterSizeInBits, OSBlock); + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + // CFI for a VGPR/AGPR partially spilled to another VGPR/AGPR dependent on + // an EXEC mask is implemented as an expression(E) rule where E is a + // location description. + // + // DW_CFA_expression: , + // (DW_OP_regx ) + // (DW_OP_regx ) + // (DW_OP_LLVM_call_frame_entry_reg ) + // (DW_OP_deref_size ) + // (DW_OP_LLVM_select_bit_piece ) + + const auto Fields = + Instr.getExtraFields(); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Register, OSBlock); + encodeDwarfRegisterLocation(Fields.SpillRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_call_frame_entry_reg); + encodeULEB128(Fields.MaskRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size) + << uint8_t(Fields.MaskRegisterSizeInBits / 8); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_select_bit_piece); + encodeULEB128(Fields.SpillRegisterLaneSizeInBits, OSBlock); + encodeULEB128(Fields.MaskRegisterSizeInBits, OSBlock); + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + } + llvm_unreachable("Unhandled case in switch"); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index acea3ab23680a..0a3f22083a51d 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -493,6 +493,10 @@ class AsmParser : public MCAsmParser { DK_CFI_LLVM_DEF_ASPACE_CFA, DK_CFI_OFFSET, DK_CFI_REL_OFFSET, + DK_CFI_LLVM_REGISTER_PAIR, + DK_CFI_LLVM_VECTOR_REGISTERS, + DK_CFI_LLVM_VECTOR_OFFSET, + DK_CFI_LLVM_VECTOR_REGISTER_MASK, DK_CFI_PERSONALITY, DK_CFI_LSDA, DK_CFI_REMEMBER_STATE, @@ -610,6 +614,10 @@ class AsmParser : public MCAsmParser { bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc); bool parseDirectiveCFISignalFrame(SMLoc DirectiveLoc); bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMRegisterPair(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorRegisters(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorOffset(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorRegisterMask(SMLoc DirectiveLoc); bool parseDirectiveCFILabel(SMLoc DirectiveLoc); bool parseDirectiveCFIValOffset(SMLoc DirectiveLoc); @@ -2116,6 +2124,14 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveCFIOffset(IDLoc); case DK_CFI_REL_OFFSET: return parseDirectiveCFIRelOffset(IDLoc); + case DK_CFI_LLVM_REGISTER_PAIR: + return parseDirectiveCFILLVMRegisterPair(IDLoc); + case DK_CFI_LLVM_VECTOR_REGISTERS: + return parseDirectiveCFILLVMVectorRegisters(IDLoc); + case DK_CFI_LLVM_VECTOR_OFFSET: + return parseDirectiveCFILLVMVectorOffset(IDLoc); + case DK_CFI_LLVM_VECTOR_REGISTER_MASK: + return parseDirectiveCFILLVMVectorRegisterMask(IDLoc); case DK_CFI_PERSONALITY: return parseDirectiveCFIPersonalityOrLsda(true); case DK_CFI_LSDA: @@ -4410,6 +4426,91 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveCFILLVMRegisterPair +/// ::= .cfi_llvm_register_pair reg, r1, r1size, r2, r2size +bool AsmParser::parseDirectiveCFILLVMRegisterPair(SMLoc DirectiveLoc) { + int64_t Register = 0; + int64_t R1 = 0, R2 = 0; + int64_t R1Size = 0, R2Size = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseRegisterOrRegisterNumber(R1, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(R1Size) || parseComma() || + parseRegisterOrRegisterNumber(R2, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(R2Size) || parseEOL()) + return true; + + getStreamer().emitCFILLVMRegisterPair(Register, R1, R1Size, R2, R2Size, + DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorRegisters +/// ::= .cfi_llvm_vector_registers reg, vreg0, vlane0, vreg0size, +bool AsmParser::parseDirectiveCFILLVMVectorRegisters(SMLoc DirectiveLoc) { + int64_t Register = 0; + std::vector VRs; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma()) + return true; + + do { + int64_t VectorRegister = 0; + int64_t Lane = 0; + int64_t Size = 0; + if (parseRegisterOrRegisterNumber(VectorRegister, DirectiveLoc) || + parseComma() || parseIntToken(Lane, "expected a lane number") || + parseComma() || parseAbsoluteExpression(Size)) + return true; + VRs.push_back({unsigned(VectorRegister), unsigned(Lane), unsigned(Size)}); + } while (parseOptionalToken(AsmToken::Comma)); + + if (parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorRegisters(Register, std::move(VRs), + DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorOffset +/// ::= .cfi_llvm_vector_offset register, register-size, mask, mask-size, offset +bool AsmParser::parseDirectiveCFILLVMVectorOffset(SMLoc DirectiveLoc) { + int64_t Register = 0, MaskRegister = 0; + int64_t RegisterSize = 0, MaskRegisterSize = 0; + int64_t Offset = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(RegisterSize) || parseComma() || + parseRegisterOrRegisterNumber(MaskRegister, DirectiveLoc) || + parseComma() || parseAbsoluteExpression(MaskRegisterSize) || + parseComma() || parseAbsoluteExpression(Offset) || parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorOffset(Register, RegisterSize, MaskRegister, + MaskRegisterSize, Offset, DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorOffset +/// ::= .cfi_llvm_vector_register_mask register, spill-reg, spill-reg-lane-size, +/// mask-reg, mask-reg-size +bool AsmParser::parseDirectiveCFILLVMVectorRegisterMask(SMLoc DirectiveLoc) { + int64_t Register = 0, SpillReg = 0, MaskReg = 0; + int64_t SpillRegLaneSize = 0, MaskRegSize = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseRegisterOrRegisterNumber(SpillReg, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(SpillRegLaneSize) || parseComma() || + parseRegisterOrRegisterNumber(MaskReg, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(MaskRegSize) || parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorRegisterMask( + Register, SpillReg, SpillRegLaneSize, MaskReg, MaskRegSize, DirectiveLoc); + return false; +} + /// parseDirectiveCFILabel /// ::= .cfi_label label bool AsmParser::parseDirectiveCFILabel(SMLoc Loc) { @@ -5444,6 +5545,11 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_llvm_def_aspace_cfa"] = DK_CFI_LLVM_DEF_ASPACE_CFA; DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; + DirectiveKindMap[".cfi_llvm_register_pair"] = DK_CFI_LLVM_REGISTER_PAIR; + DirectiveKindMap[".cfi_llvm_vector_registers"] = DK_CFI_LLVM_VECTOR_REGISTERS; + DirectiveKindMap[".cfi_llvm_vector_offset"] = DK_CFI_LLVM_VECTOR_OFFSET; + DirectiveKindMap[".cfi_llvm_vector_register_mask"] = + DK_CFI_LLVM_VECTOR_REGISTER_MASK; DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index d4901d95e565a..55b1bb8c22689 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -5275,6 +5275,10 @@ void MasmParser::initializeDirectiveKindMap() { // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; + // DirectiveKindMap[".cfi_llvm_register_pair"] = DK_CFI_LLVM_REGISTER_PAIR; + // DirectiveKindMap[".cfi_llvm_vector_registers"] = + // DK_CFI_LLVM_VECTOR_REGISTERS; + // DirectiveKindMap[".cfi_llvm_vector_offset"] = DK_CFI_LLVM_VECTOR_OFFSET; // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index bc7398120096e..27a87a6281340 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -630,6 +630,60 @@ void MCStreamer::emitCFIGnuArgsSize(int64_t Size, SMLoc Loc) { CurFrame->Instructions.push_back(std::move(Instruction)); } +void MCStreamer::emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1Size, int64_t R2, + int64_t R2Size, SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMRegisterPair( + Label, Register, R1, R1Size, R2, R2Size, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorRegisters( + int64_t Register, std::vector VRs, + SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorRegisters( + Label, Register, std::move(VRs), Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + int64_t Offset, SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorOffset( + Label, Register, RegisterSizeInBits, MaskRegister, MaskRegisterSizeInBits, + Offset, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorRegisterMask( + int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc) { + + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorRegisterMask( + Label, Register, SpillRegister, SpillRegisterLaneSizeInBits, MaskRegister, + MaskRegisterSizeInBits, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + void MCStreamer::emitCFISignalFrame() { MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); if (!CurFrame) diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s new file mode 100644 index 0000000000000..d742cfc49689c --- /dev/null +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s @@ -0,0 +1,57 @@ +; RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj %s | llvm-dwarfdump -debug-frame - | FileCheck %s + +.text +.cfi_sections .debug_frame + +; CHECK-NOT: DW_CFA_expression + +register_pair: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: PC_REG DW_OP_regx SGPR30, DW_OP_piece 0x4, DW_OP_regx SGPR31, DW_OP_piece 0x4 + .cfi_llvm_register_pair 16, 62, 32, 63, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_registers: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: PC_REG DW_OP_regx 0x67f, DW_OP_bit_piece 0x20 0x0, DW_OP_regx 0x67f, DW_OP_bit_piece 0x20 0x20 + .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_registers_single: + .cfi_startproc + s_nop 2 + ;; Note that 0x2c below is the offset in the VGPR, so 4 (bytes, vgpr lane size) * 11 (the lane). + ; CHECK: DW_CFA_expression: SGPR45 DW_OP_regx VGPR41, DW_OP_LLVM_user DW_OP_LLVM_offset_uconst 0x2c + .cfi_llvm_vector_registers 77, 2601, 11, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_offsets: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: VGPR40 DW_OP_regx VGPR40, DW_OP_swap, DW_OP_LLVM_user DW_OP_LLVM_offset_uconst 0x100, DW_OP_LLVM_user DW_OP_LLVM_call_frame_entry_reg EXEC, DW_OP_deref_size 0x8, DW_OP_LLVM_user DW_OP_LLVM_select_bit_piece 0x20 0x40 + .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_register_mask: + .cfi_startproc + s_nop 0 + ; CHECK: DW_CFA_expression: VGPR40 DW_OP_regx VGPR40, DW_OP_regx AGPR0, DW_OP_LLVM_user DW_OP_LLVM_call_frame_entry_reg EXEC, DW_OP_deref_size 0x8, DW_OP_LLVM_user DW_OP_LLVM_select_bit_piece 0x20 0x40 + .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 + s_nop 0 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression diff --git a/llvm/test/MC/ELF/cfi-register-pair.s b/llvm/test/MC/ELF/cfi-register-pair.s new file mode 100644 index 0000000000000..05ef8e9ae2a4d --- /dev/null +++ b/llvm/test/MC/ELF/cfi-register-pair.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_register_pair 16, 62, 32, 63, 32 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 56 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 20000000 18000000 00000000 |.... ...........| +// CHECK-NEXT: 0020: 08000000 00411010 08903E93 04903F93 |.....A....>...?.| +// CHECK-NEXT: 0030: 04000000 00000000 |........| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } diff --git a/llvm/test/MC/ELF/cfi-vector-offset.s b/llvm/test/MC/ELF/cfi-vector-offset.s new file mode 100644 index 0000000000000..7817396b8f316 --- /dev/null +++ b/llvm/test/MC/ELF/cfi-vector-offset.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 64 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 28000000 18000000 00000000 |....(...........| +// CHECK-NEXT: 0020: 08000000 004110A8 141190A8 1416E905 |.....A..........| +// CHECK-NEXT: 0030: 8002E907 119408E9 0C204000 00000000 |......... @.....| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } diff --git a/llvm/test/MC/ELF/cfi-vector-registers.s b/llvm/test/MC/ELF/cfi-vector-registers.s new file mode 100644 index 0000000000000..76f001007a272 --- /dev/null +++ b/llvm/test/MC/ELF/cfi-vector-registers.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa -mcpu=gfx908 %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 56 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 20000000 18000000 00000000 |.... ...........| +// CHECK-NEXT: 0020: 08000000 00411010 0C90FF0C 9D200090 |.....A....... ..| +// CHECK-NEXT: 0030: FF0C9D20 20000000 |... ...| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } From cbbe61354060f33e9afa965e3cb58e15bd404046 Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Wed, 9 Jul 2025 12:20:01 -0400 Subject: [PATCH 3/5] [AMDGPU] Emit entry function Dwarf CFI Entry functions represent the end of unwinding, as they are the outer-most frame. This implies they can only have a meaningful definition for the CFA, which AMDGPU defines using a memory location description with a literal private address space address. The return address is set to undefined as a sentinel value to signal the end of unwinding. Co-authored-by: Scott Linder Co-authored-by: Venkata Ramanaiah Nalamothu --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 57 +- llvm/lib/Target/AMDGPU/SIFrameLowering.h | 6 + .../memory-legalizer-atomic-fence.ll | 480 ++++++ .../av-spill-expansion-with-machine-cp.mir | 8 + .../branch-folding-implicit-def-subreg.ll | 2 + .../test/CodeGen/AMDGPU/dbg-info-inline-at.ll | 2 + llvm/test/CodeGen/AMDGPU/debug-frame.ll | 1405 +++++++++++++++++ .../eliminate-frame-index-s-add-i32.mir | 124 +- .../eliminate-frame-index-s-add-u32.mir | 24 +- ...minate-frame-index-v-add-co-u32-wave32.mir | 28 +- .../eliminate-frame-index-v-add-co-u32.mir | 140 +- .../eliminate-frame-index-v-add-u32.mir | 216 ++- .../CodeGen/AMDGPU/entry-function-cfi.mir | 34 + .../frame-index-elimination-tied-operand.mir | 2 + .../CodeGen/AMDGPU/inflate-av-remat-imm.mir | 6 + ...sue98474-assigned-physreg-interference.mir | 2 + ...egrewriter-live-out-undef-subregisters.mir | 10 + .../AMDGPU/kernel-mubuf-with-voffset.mir | 2 + llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll | 2 + ...al-regcopy-and-spill-missed-at-regalloc.ll | 4 + .../AMDGPU/pei-reg-scavenger-position.mir | 2 + .../regalloc-introduces-copy-sgpr-to-agpr.mir | 2 + .../sgpr-spill-dead-frame-in-dbg-value.mir | 2 + llvm/test/CodeGen/AMDGPU/sgpr-spill.mir | 12 + .../CodeGen/AMDGPU/spill-special-sgpr.mir | 6 + .../transform-block-with-return-to-epilog.ll | 8 + 26 files changed, 2547 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/debug-frame.ll create mode 100644 llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 5c39f7a3d6daa..71356aa2aced1 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -12,8 +12,10 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" @@ -43,6 +45,15 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, return MCRegister(); } +static bool needsFrameMoves(const MachineFunction &MF) { + // FIXME: There are some places in the compiler which are sensitive to the CFI + // pseudos and so using MachineFunction::needsFrameMoves has the unintended + // effect of making enabling debug info affect codegen. Once we have + // identified and fixed those cases this should be replaced with + // MF.needsFrameMoves() + return true; +} + // Find a scratch register that we can use in the prologue. We avoid using // callee-save registers since they may appear to be free when this is called // from canUseAsPrologue (during shrink wrapping), but then no longer be free @@ -615,10 +626,39 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); assert(MFI->isEntryFunction()); + // Debug location must be unknown since the first debug location is used to + // determine the end of the prologue. + DebugLoc DL; + MachineBasicBlock::iterator I = MBB.begin(); + + if (needsFrameMoves(MF)) { + // On entry the SP/FP are not set up, so we need to define the CFA in terms + // of a literal location expression. + static const char CFAEncodedInstUserOpsArr[] = { + dwarf::DW_CFA_def_cfa_expression, + 4, // length + static_cast(dwarf::DW_OP_lit0), + static_cast(dwarf::DW_OP_lit0 + + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave), + static_cast(dwarf::DW_OP_LLVM_user), + static_cast(dwarf::DW_OP_LLVM_form_aspace_address)}; + static StringRef CFAEncodedInstUserOps = + StringRef(CFAEncodedInstUserOpsArr, sizeof(CFAEncodedInstUserOpsArr)); + buildCFI(MBB, I, DL, + MCCFIInstruction::createEscape(nullptr, CFAEncodedInstUserOps, + SMLoc(), + "CFA is 0 in private_wave aspace")); + // Unwinding halts when the return address (PC) is undefined. + buildCFI(MBB, I, DL, + MCCFIInstruction::createUndefined( + nullptr, MCRI->getDwarfRegNum(AMDGPU::PC_REG, false))); + } + Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); @@ -655,11 +695,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } } - // Debug location must be unknown since the first debug location is used to - // determine the end of the prologue. - DebugLoc DL; - MachineBasicBlock::iterator I = MBB.begin(); - // We found the SRSRC first because it needs four registers and has an // alignment requirement. If the SRSRC that we found is clobbering with // the scratch wave offset, which may be in a fixed SGPR or a free SGPR @@ -2210,3 +2245,15 @@ bool SIFrameLowering::requiresStackPointerReference( // references the SP, like variable sized stack objects. return frameTriviallyRequiresSP(MFI); } + +MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag) const { + MachineFunction &MF = *MBB.getParent(); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + return BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFIInst)) + .setMIFlag(flag); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index a72772987262e..0b691d8f15a48 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -104,6 +104,12 @@ class SIFrameLowering final : public AMDGPUFrameLowering { public: bool requiresStackPointerReference(const MachineFunction &MF) const; + /// Create a CFI index for CFIInst and build a MachineInstr around it. + MachineInstr * + buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const; + // Returns true if the function may need to reserve space on the stack for the // CWSR trap handler. bool mayReserveScratchForCWSR(const MachineFunction &MF) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index e86f7473363f7..c037a93af124b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -13,18 +13,24 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; GFX6-LABEL: name: system_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -33,6 +39,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -41,6 +49,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -49,6 +59,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -62,34 +74,46 @@ entry: define amdgpu_kernel void @system_one_as_release() #0 { ; GFX6-LABEL: name: system_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -101,18 +125,24 @@ entry: define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; GFX6-LABEL: name: system_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -121,6 +151,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -129,6 +161,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -137,6 +171,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -150,18 +186,24 @@ entry: define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; GFX6-LABEL: name: system_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -170,6 +212,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -178,6 +222,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -186,6 +232,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -199,26 +247,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acquire() #0 { ; GFX6-LABEL: name: singlethread_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acquire @@ -228,26 +288,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_release() #0 { ; GFX6-LABEL: name: singlethread_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") release @@ -257,26 +329,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acq_rel @@ -286,26 +370,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") seq_cst @@ -315,18 +411,24 @@ entry: define amdgpu_kernel void @agent_one_as_acquire() #0 { ; GFX6-LABEL: name: agent_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -335,6 +437,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -343,6 +447,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -351,6 +457,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -364,34 +472,46 @@ entry: define amdgpu_kernel void @agent_one_as_release() #0 { ; GFX6-LABEL: name: agent_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -403,18 +523,24 @@ entry: define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; GFX6-LABEL: name: agent_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -423,6 +549,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -431,6 +559,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -439,6 +569,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -452,18 +584,24 @@ entry: define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; GFX6-LABEL: name: agent_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -472,6 +610,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -480,6 +620,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -488,6 +630,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -501,14 +645,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; GFX6-LABEL: name: workgroup_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -516,10 +666,14 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -527,6 +681,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acquire @@ -536,14 +692,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_release() #0 { ; GFX6-LABEL: name: workgroup_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -551,6 +713,8 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -558,12 +722,16 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -575,14 +743,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -591,6 +765,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -598,6 +774,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -605,6 +783,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -616,14 +796,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -632,6 +818,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -639,6 +827,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -646,6 +836,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -657,26 +849,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acquire() #0 { ; GFX6-LABEL: name: wavefront_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acquire @@ -686,26 +890,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_release() #0 { ; GFX6-LABEL: name: wavefront_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") release @@ -715,26 +931,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acq_rel @@ -744,26 +972,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") seq_cst @@ -773,18 +1013,24 @@ entry: define amdgpu_kernel void @system_acquire() #0 { ; GFX6-LABEL: name: system_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -793,6 +1039,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX10CU-LABEL: name: system_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -801,6 +1049,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -809,6 +1059,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11CU-LABEL: name: system_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -822,34 +1074,46 @@ entry: define amdgpu_kernel void @system_release() #0 { ; GFX6-LABEL: name: system_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -861,18 +1125,24 @@ entry: define amdgpu_kernel void @system_acq_rel() #0 { ; GFX6-LABEL: name: system_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -881,6 +1151,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -889,6 +1161,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -897,6 +1171,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -910,18 +1186,24 @@ entry: define amdgpu_kernel void @system_seq_cst() #0 { ; GFX6-LABEL: name: system_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -930,6 +1212,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -938,6 +1222,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -946,6 +1232,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -959,26 +1247,38 @@ entry: define amdgpu_kernel void @singlethread_acquire() #0 { ; GFX6-LABEL: name: singlethread_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acquire @@ -988,26 +1288,38 @@ entry: define amdgpu_kernel void @singlethread_release() #0 { ; GFX6-LABEL: name: singlethread_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") release @@ -1017,26 +1329,38 @@ entry: define amdgpu_kernel void @singlethread_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acq_rel @@ -1046,26 +1370,38 @@ entry: define amdgpu_kernel void @singlethread_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") seq_cst @@ -1075,18 +1411,24 @@ entry: define amdgpu_kernel void @agent_acquire() #0 { ; GFX6-LABEL: name: agent_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1095,6 +1437,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1103,6 +1447,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1111,6 +1457,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1124,34 +1472,46 @@ entry: define amdgpu_kernel void @agent_release() #0 { ; GFX6-LABEL: name: agent_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1163,18 +1523,24 @@ entry: define amdgpu_kernel void @agent_acq_rel() #0 { ; GFX6-LABEL: name: agent_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1183,6 +1549,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1191,6 +1559,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1199,6 +1569,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1212,18 +1584,24 @@ entry: define amdgpu_kernel void @agent_seq_cst() #0 { ; GFX6-LABEL: name: agent_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1232,6 +1610,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1240,6 +1620,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1248,6 +1630,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1261,16 +1645,22 @@ entry: define amdgpu_kernel void @workgroup_acquire() #0 { ; GFX6-LABEL: name: workgroup_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1278,11 +1668,15 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 49279 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1290,6 +1684,8 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 64519 ; GFX11CU-NEXT: S_ENDPGM 0 entry: @@ -1300,16 +1696,22 @@ entry: define amdgpu_kernel void @workgroup_release() #0 { ; GFX6-LABEL: name: workgroup_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1317,6 +1719,8 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1324,12 +1728,16 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1341,16 +1749,22 @@ entry: define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1359,6 +1773,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1366,6 +1782,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1373,6 +1791,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1384,16 +1804,22 @@ entry: define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1402,6 +1828,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1409,6 +1837,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1416,6 +1846,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1427,26 +1859,38 @@ entry: define amdgpu_kernel void @wavefront_acquire() #0 { ; GFX6-LABEL: name: wavefront_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acquire @@ -1456,26 +1900,38 @@ entry: define amdgpu_kernel void @wavefront_release() #0 { ; GFX6-LABEL: name: wavefront_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") release @@ -1485,26 +1941,38 @@ entry: define amdgpu_kernel void @wavefront_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acq_rel @@ -1514,26 +1982,38 @@ entry: define amdgpu_kernel void @wavefront_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir index dfe4b8a33f396..02856a31d2fb7 100644 --- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir +++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir @@ -21,6 +21,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -31,6 +33,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -63,6 +67,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec @@ -79,6 +85,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index d89b39348ad9a..863177ae3d6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -7,6 +7,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17, $sgpr12_sgpr13 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0 ; GFX90A-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr12, $sgpr17, implicit-def $scc ; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll index ed609f85918f9..20077fa5d96a7 100644 --- a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll +++ b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll @@ -8,6 +8,8 @@ define amdgpu_kernel void @_Z3fooPiiii(ptr addrspace(1) nocapture noundef writeo ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 ; CHECK-NEXT: .file 1 "." "a.h" ; CHECK-NEXT: .loc 1 5 12 prologue_end ; ./a.h:5:12 @[ a.hip:12:8 ] ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll new file mode 100644 index 0000000000000..40ff6ccf0cb0f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -0,0 +1,1405 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=0 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-DIS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=1 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-EN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +define protected amdgpu_kernel void @kern1() #0 { +; CHECK-LABEL: kern1: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 +; CHECK-NEXT: s_endpgm +entry: + ret void +} + +define hidden void @func_no_clobber() #0 { +; CHECK-LABEL: func_no_clobber: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + ret void +} + +define void @callee_need_to_spill_fp_to_memory() #1 { +; GFX900-LABEL: callee_need_to_spill_fp_to_memory: +; GFX900: .Lfunc_begin2: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s40, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber nonpreserved SGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber all VGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX900-NEXT: s_addk_i32 s32, 0x7100 +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: s_mov_b32 s33, s40 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-DIS: .Lfunc_begin2: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber all VGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-EN: .Lfunc_begin2: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a16, v72 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a17, v73 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a18, v74 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a19, v75 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a20, v76 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a21, v77 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a22, v78 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a23, v79 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a24, v88 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a25, v89 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a26, v90 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a27, v91 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a28, v92 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a29, v93 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a30, v94 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a31, v95 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber all VGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x5100 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v95, a31 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v94, a30 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v93, a29 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v92, a28 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v91, a27 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v90, a26 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v89, a25 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v88, a24 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v79, a23 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v78, a22 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v77, a21 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v76, a20 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v75, a19 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v74, a18 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v73, a17 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v72, a16 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: callee_need_to_spill_fp_to_memory: +; WAVE32: .Lfunc_begin2: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s40, s33 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber nonpreserved SGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber all VGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x3e +; WAVE32-NEXT: buffer_load_dword v255, off, s[0:3], s33 +; WAVE32-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 +; WAVE32-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 +; WAVE32-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 +; WAVE32-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 +; WAVE32-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 +; WAVE32-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 +; WAVE32-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 +; WAVE32-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 +; WAVE32-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 +; WAVE32-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 +; WAVE32-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 +; WAVE32-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 +; WAVE32-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 +; WAVE32-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 +; WAVE32-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 +; WAVE32-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 +; WAVE32-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 +; WAVE32-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 +; WAVE32-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 +; WAVE32-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 +; WAVE32-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 +; WAVE32-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 +; WAVE32-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 +; WAVE32-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 +; WAVE32-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 +; WAVE32-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 +; WAVE32-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 +; WAVE32-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 +; WAVE32-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 +; WAVE32-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 +; WAVE32-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 +; WAVE32-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 +; WAVE32-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 +; WAVE32-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 +; WAVE32-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 +; WAVE32-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 +; WAVE32-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 +; WAVE32-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 +; WAVE32-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 +; WAVE32-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 +; WAVE32-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 +; WAVE32-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 +; WAVE32-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 +; WAVE32-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 +; WAVE32-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 +; WAVE32-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 +; WAVE32-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 +; WAVE32-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 +; WAVE32-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 +; WAVE32-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 +; WAVE32-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 +; WAVE32-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 +; WAVE32-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 +; WAVE32-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 +; WAVE32-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 +; WAVE32-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 +; WAVE32-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 +; WAVE32-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 +; WAVE32-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 +; WAVE32-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 +; WAVE32-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 +; WAVE32-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 +; WAVE32-NEXT: s_clause 0x30 +; WAVE32-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 +; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 +; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 +; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 +; WAVE32-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 +; WAVE32-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 +; WAVE32-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 +; WAVE32-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 +; WAVE32-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 +; WAVE32-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 +; WAVE32-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 +; WAVE32-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 +; WAVE32-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 +; WAVE32-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 +; WAVE32-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 +; WAVE32-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 +; WAVE32-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 +; WAVE32-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 +; WAVE32-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 +; WAVE32-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 +; WAVE32-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 +; WAVE32-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 +; WAVE32-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 +; WAVE32-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 +; WAVE32-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 +; WAVE32-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 +; WAVE32-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 +; WAVE32-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 +; WAVE32-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 +; WAVE32-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 +; WAVE32-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 +; WAVE32-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 +; WAVE32-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 +; WAVE32-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 +; WAVE32-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 +; WAVE32-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 +; WAVE32-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 +; WAVE32-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 +; WAVE32-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 +; WAVE32-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 +; WAVE32-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 +; WAVE32-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 +; WAVE32-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 +; WAVE32-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 +; WAVE32-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 +; WAVE32-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 +; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 +; WAVE32-NEXT: s_addk_i32 s32, 0x3880 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 s33, s40 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber nonpreserved SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + +declare hidden void @ex() #0 + +define hidden void @func_call_clobber() #0 { +; GFX900-LABEL: func_call_clobber: +; GFX900: .Lfunc_begin3: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s16, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[18:19] +; GFX900-NEXT: v_writelane_b32 v40, s16, 2 +; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v40, s30, 0 +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: v_writelane_b32 v40, s31, 1 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s31, v40, 1 +; GFX900-NEXT: v_readlane_b32 s30, v40, 0 +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: v_readlane_b32 s4, v40, 2 +; GFX900-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: s_mov_b32 s33, s4 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_call_clobber: +; GFX90A-V2A-DIS: .Lfunc_begin3: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-DIS-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-DIS-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-DIS-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-DIS-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_call_clobber: +; GFX90A-V2A-EN: .Lfunc_begin3: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-EN-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-EN-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-EN-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-EN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-EN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_call_clobber: +; WAVE32: .Lfunc_begin3: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s16, s33 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: s_or_saveexec_b32 s17, -1 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s17 +; WAVE32-NEXT: v_writelane_b32 v40, s16, 2 +; WAVE32-NEXT: s_addk_i32 s32, 0x200 +; WAVE32-NEXT: s_getpc_b64 s[16:17] +; WAVE32-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; WAVE32-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 +; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 +; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] +; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 +; WAVE32-NEXT: v_readlane_b32 s30, v40, 0 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: v_readlane_b32 s4, v40, 2 +; WAVE32-NEXT: s_or_saveexec_b32 s5, -1 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s5 +; WAVE32-NEXT: s_mov_b32 s33, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void @ex() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_vmem() #0 { +; GFX900-LABEL: func_spill_vgpr_to_vmem: +; GFX900: .Lfunc_begin4: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-DIS: .Lfunc_begin4: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-EN: .Lfunc_begin4: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_vmem: +; WAVE32: .Lfunc_begin4: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void asm sideeffect "; clobber", "~{v40}"() #0 + call void asm sideeffect "; clobber", "~{v41}"() #0 + call void asm sideeffect "; clobber", "~{a32}"() #0 + call void asm sideeffect "; clobber", "~{a33}"() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_agpr() #2 { +; GFX900-LABEL: func_spill_vgpr_to_agpr: +; GFX900: .Lfunc_begin5: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-DIS: .Lfunc_begin5: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-EN: .Lfunc_begin5: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_agpr: +; WAVE32: .Lfunc_begin5: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber", "~{v40}"() + call void asm sideeffect "; clobber", "~{v41}"() + call void asm sideeffect "; clobber", "~{a32}"() + call void asm sideeffect "; clobber", "~{a33}"() + ret void +} + + +; NOTE: Number of VGPRs available to kernel, and in turn number of corresponding CFIs generated, +; is dependent on waves/WG size. Since the intent here is to check whether we generate the correct +; CFIs, doing it for any one set of details is sufficient which also makes the test insensitive to +; changes in those details. +attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" "frame-pointer"="all" } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index 49a91e6f6f33b..dafd6cce2d878 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -445,6 +445,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -453,17 +455,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -485,6 +493,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc @@ -493,17 +503,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -525,6 +541,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -533,17 +551,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -567,6 +591,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc @@ -575,6 +601,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc @@ -583,12 +611,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc @@ -613,6 +645,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc @@ -621,6 +655,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc @@ -629,12 +665,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def dead $scc @@ -658,6 +698,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc @@ -666,6 +708,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc @@ -674,12 +718,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc @@ -750,6 +798,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc @@ -758,6 +808,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc @@ -766,12 +818,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc @@ -911,6 +967,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc @@ -919,17 +977,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -952,6 +1016,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc @@ -960,17 +1026,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -1198,6 +1270,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1206,6 +1280,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1214,12 +1290,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc @@ -1244,6 +1324,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1252,6 +1334,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1260,12 +1344,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc @@ -1291,6 +1379,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1299,6 +1389,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1307,12 +1399,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc @@ -1338,6 +1434,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1346,6 +1444,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1354,12 +1454,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir index af61bd70f16b6..442018d21734a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir @@ -58,6 +58,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -66,17 +68,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -98,6 +106,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc @@ -106,17 +116,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir index 348743644ce4f..2a4b305f32cef 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir @@ -274,11 +274,15 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -337,12 +341,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -366,12 +374,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 %stack.0, renamable $vgpr1, 0, implicit $exec @@ -396,11 +408,15 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -424,11 +440,15 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -453,12 +473,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index ade7b4266e9e6..ae53a3696fc2b 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -890,13 +890,17 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -918,6 +922,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec @@ -925,7 +931,9 @@ body: | ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def $vcc, implicit $exec @@ -949,13 +957,17 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -978,6 +990,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX7: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -987,6 +1001,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -996,6 +1012,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX900: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1005,6 +1023,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1014,22 +1034,30 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX942: $sgpr4 = S_MOV_B32 72 + ; GFX942: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX942-NEXT: $sgpr4 = S_MOV_B32 72 ; GFX942-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX11: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; GFX11: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX11-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX12: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; GFX12: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX12-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -1240,6 +1268,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1248,6 +1278,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_CO_U32_e32 renamable $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec @@ -1271,6 +1303,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec @@ -1279,6 +1313,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -1302,6 +1338,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec @@ -1310,6 +1348,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 %stack.0, renamable $vgpr1, 0, implicit $exec @@ -1334,6 +1374,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1341,6 +1383,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1363,6 +1407,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1370,6 +1416,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1393,6 +1441,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1400,6 +1450,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1423,6 +1475,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1430,6 +1484,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1452,6 +1508,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1459,6 +1517,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1481,6 +1541,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec @@ -1489,6 +1551,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def $vcc, implicit $exec @@ -1514,6 +1578,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1522,6 +1588,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec @@ -1548,6 +1616,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1556,6 +1626,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec @@ -1581,6 +1653,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1589,6 +1663,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1614,6 +1690,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1622,6 +1700,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1647,6 +1727,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1655,6 +1737,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.1, 0, implicit $exec @@ -1679,6 +1763,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel ; MUBUFW64: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec @@ -1687,6 +1773,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel ; FLATSCRW64: liveins: $sgpr4 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1754,6 +1842,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1761,6 +1851,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1814,6 +1906,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel_live_co ; MUBUFW64: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 0, killed $sgpr4, 0, implicit $exec @@ -1822,6 +1916,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel_live_co ; FLATSCRW64: liveins: $sgpr4 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 0, killed $sgpr4, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr4_sgpr5 renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1846,6 +1942,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1858,6 +1956,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1870,6 +1970,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1882,6 +1984,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1894,6 +1998,8 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -1905,6 +2011,8 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec @@ -1914,6 +2022,8 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -1922,6 +2032,8 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -1950,6 +2062,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1962,6 +2076,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1974,6 +2090,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1986,6 +2104,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1998,6 +2118,8 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -2009,6 +2131,8 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec @@ -2018,6 +2142,8 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -2026,6 +2152,8 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index 6a4671058dc0e..c5c9696ee355a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -622,6 +622,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; MUBUF: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -631,6 +633,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; MUBUFW32: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec @@ -639,6 +643,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -646,6 +652,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.1, $vgpr8, 1, implicit $exec @@ -668,6 +676,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec @@ -676,17 +686,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.0, implicit $exec SI_RETURN implicit $vgpr0 @@ -708,6 +724,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec @@ -716,17 +734,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -751,6 +775,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -759,17 +785,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.1, implicit $exec SI_RETURN implicit $vgpr0 @@ -792,6 +824,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -800,17 +834,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -833,6 +873,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -841,17 +883,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.1, 12, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -874,6 +922,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec @@ -882,17 +932,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp - ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp - ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -917,6 +973,8 @@ body: | ; MUBUF-LABEL: name: killed_reg_regression ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec @@ -930,6 +988,8 @@ body: | ; MUBUFW32-LABEL: name: killed_reg_regression ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec @@ -943,6 +1003,8 @@ body: | ; FLATSCRW64-LABEL: name: killed_reg_regression ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec @@ -954,6 +1016,8 @@ body: | ; FLATSCRW32-LABEL: name: killed_reg_regression ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec @@ -987,6 +1051,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -995,6 +1061,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1003,12 +1071,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e32 renamable $vgpr1, %stack.0, implicit $exec @@ -1032,6 +1104,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1040,6 +1114,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1048,12 +1124,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, renamable $vgpr1, implicit $exec @@ -1077,6 +1157,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec @@ -1085,6 +1167,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec @@ -1093,12 +1177,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.0, implicit $exec @@ -1122,6 +1210,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1130,6 +1220,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1138,12 +1230,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -1168,6 +1264,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1177,6 +1275,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1186,6 +1286,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1193,6 +1295,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1218,6 +1322,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1227,6 +1333,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec @@ -1235,6 +1343,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1242,6 +1352,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 renamable $vgpr0 = V_ADD_U32_e64 renamable $sgpr8, %stack.1, 0, implicit $exec @@ -1266,6 +1378,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1273,6 +1387,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1280,11 +1396,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1307,6 +1427,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1314,6 +1436,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1321,11 +1445,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, $vgpr0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1349,6 +1477,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1356,6 +1486,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1363,11 +1495,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1391,6 +1527,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1398,6 +1536,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1405,11 +1545,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1432,6 +1576,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1439,6 +1585,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1446,11 +1594,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, killed $vgpr0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1475,6 +1627,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1483,6 +1637,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1491,12 +1647,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec @@ -1523,6 +1683,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1531,6 +1693,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1539,12 +1703,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec @@ -1570,6 +1738,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1578,6 +1748,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1586,12 +1758,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec @@ -1617,6 +1793,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1625,6 +1803,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1633,12 +1813,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec @@ -1664,6 +1848,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1672,6 +1858,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1680,12 +1868,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir new file mode 100644 index 0000000000000..dd2503502211f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=prologepilog -o - %s | FileCheck %s + +--- | + + define protected amdgpu_kernel void @kern1() { + entry: + ret void + } +... +--- +name: kern1 +alignment: 1 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + isEntryFunction: true + scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + workGroupIDX: { reg: '$sgpr0' } + privateSegmentWaveByteOffset: { reg: '$sgpr1' } + workItemIDX: { reg: '$vgpr0' } +body: | + bb.0: + ; CHECK-LABEL: name: kern1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: S_ENDPGM 0 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir index 17ec6f5b37241..e861a15981186 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir @@ -21,6 +21,8 @@ body: | ; GFX11-LABEL: name: tied_operand_test ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = SCRATCH_LOAD_SHORT_D16_HI_ST 0, 0, killed renamable $vgpr0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr0_sgpr1, 4, 0 diff --git a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir index 4d8fb8db624f8..2872cfd212273 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir @@ -19,6 +19,8 @@ body: | ; CHECK-LABEL: name: av_mov_b32_split ; CHECK: liveins: $agpr3, $agpr4, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec ; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec @@ -68,6 +70,8 @@ body: | ; CHECK-LABEL: name: v_mov_b32_split ; CHECK: liveins: $agpr3, $agpr4, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec @@ -120,6 +124,8 @@ body: | ; CHECK-LABEL: name: av_mov_b64_split ; CHECK: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr0_agpr1 ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr0_agpr1 ; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec, implicit-def $agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir index 786ce40203836..e44736584767b 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir @@ -14,6 +14,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0, $vgpr2 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir index 86b6c5982b4cb..a244a433a4efb 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir @@ -19,6 +19,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -67,6 +69,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -115,6 +119,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -164,6 +170,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -215,6 +223,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir index 7a913cf50ea2b..f96c3c56896c0 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir +++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir @@ -31,6 +31,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $sgpr33 = S_MOV_B32 0 ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll index 4d23fb116cd03..294d8bbbeba63 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -22,6 +22,8 @@ entry: ; GCN-LABEL: {{^}}only_undef_dbg_value: ; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef +; NOOPT-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; NOOPT-NEXT: .cfi_undefined 16 ; NOOPT-NEXT: s_endpgm ; OPT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index 2aae26b9470a8..34dd69f966637 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -33,6 +33,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908: bb.0 (%ir-block.0): ; PEI-GFX908-NEXT: liveins: $agpr4, $sgpr4_sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9 ; PEI-GFX908-NEXT: {{ $}} + ; PEI-GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-GFX908-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX908-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; PEI-GFX908-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 @@ -79,6 +81,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A: bb.0 (%ir-block.0): ; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; PEI-GFX90A-NEXT: {{ $}} + ; PEI-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir index aa4428f3da4eb..8027373123d61 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -27,6 +27,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr4 = S_MOV_B32 524288 diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir index e4cbae66d47fa..7f12571a6bdb4 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir @@ -36,6 +36,8 @@ body: | ; GFX908-LABEL: name: regalloc_introduces_s_to_a_copy ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, $vgpr32_vgpr33_vgpr34_vgpr35, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir index 520717391b596..2f6c628d290ea 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir @@ -59,6 +59,8 @@ body: | ; PEI: bb.0: ; PEI-NEXT: successors: %bb.1(0x80000000) ; PEI-NEXT: {{ $}} + ; PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-NEXT: renamable $sgpr10 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir index ba2e80fdc04c8..92c4249b26069 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -58,6 +58,8 @@ body: | ; GCN64-MUBUF-LABEL: name: check_spill ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN64-MUBUF-NEXT: {{ $}} + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-MUBUF-NEXT: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 @@ -222,6 +224,8 @@ body: | ; GCN32-MUBUF-LABEL: name: check_spill ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN32-MUBUF-NEXT: {{ $}} + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN32-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN32-MUBUF-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -386,6 +390,8 @@ body: | ; GCN64-FLATSCR-LABEL: name: check_spill ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 ; GCN64-FLATSCR-NEXT: {{ $}} + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc @@ -617,6 +623,8 @@ body: | ; GCN64-MUBUF-LABEL: name: check_reload ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN64-MUBUF-NEXT: {{ $}} + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-MUBUF-NEXT: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 @@ -755,6 +763,8 @@ body: | ; GCN32-MUBUF-LABEL: name: check_reload ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN32-MUBUF-NEXT: {{ $}} + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN32-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN32-MUBUF-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -893,6 +903,8 @@ body: | ; GCN64-FLATSCR-LABEL: name: check_reload ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 ; GCN64-FLATSCR-NEXT: {{ $}} + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir index 639bf6a6d550c..3531b3dd75792 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -40,6 +40,8 @@ body: | ; GFX9-LABEL: name: check_vcc ; GFX9: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX9-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX9-NEXT: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 @@ -77,6 +79,8 @@ body: | ; GFX10-LABEL: name: check_vcc ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX10-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -114,6 +118,8 @@ body: | ; GFX11-LABEL: name: check_vcc ; GFX11: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index 0cf26be3ac24f..42386385a8016 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -7,6 +7,8 @@ define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 { ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 entry: ret float %a @@ -18,6 +20,8 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GCN-NEXT: liveins: $sgpr2, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; GCN-NEXT: {{ $}} @@ -51,6 +55,8 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; GCN-NEXT: {{ $}} @@ -103,6 +109,8 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(floa ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec From ee97ce9976bd3abb865771698db701aed99c39e4 Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Wed, 25 Jun 2025 10:04:58 -0400 Subject: [PATCH 4/5] [AMDGPU] Implement CFI for non-kernel functions This does not implement CSR spills other than those AMDGPU handles during PEI. The remaining spills are handled in a subsequent patch. Co-authored-by: Scott Linder Co-authored-by: Venkata Ramanaiah Nalamothu --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 304 +- llvm/lib/Target/AMDGPU/SIFrameLowering.h | 30 +- .../GlobalISel/call-outgoing-stack-args.ll | 8 +- .../GlobalISel/dynamic-alloca-uniform.ll | 6 +- .../CodeGen/AMDGPU/GlobalISel/localizer.ll | 2 +- .../AMDGPU/GlobalISel/non-entry-alloca.ll | 4 +- .../AMDGPU/accvgpr-spill-scc-clobber.mir | 5568 +++++++++++++++++ .../CodeGen/AMDGPU/agpr-copy-reuse-writes.mir | 24 + llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir | 48 + .../CodeGen/AMDGPU/amdgcn-call-whole-wave.ll | 286 +- .../CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll | 24 + .../test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll | 28 +- .../AMDGPU/av_spill_cross_bb_usage.mir | 465 ++ .../CodeGen/AMDGPU/bug-undef-spilled-agpr.mir | 6 + llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 48 +- .../CodeGen/AMDGPU/call-argument-types.ll | 32 +- .../callee-special-input-vgprs-packed.ll | 14 +- .../AMDGPU/callee-special-input-vgprs.ll | 12 +- .../AMDGPU/csr-sgpr-spill-live-ins.mir | 6 + llvm/test/CodeGen/AMDGPU/debug-frame.ll | 1672 ++++- .../AMDGPU/dwarf-multi-register-use-crash.ll | 468 ++ .../dynamic-vgpr-reserve-stack-for-cwsr.ll | 4 +- .../test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 243 +- .../eliminate-frame-index-s-add-i32.mir | 384 +- .../eliminate-frame-index-s-mov-b32.mir | 2196 +++++++ .../eliminate-frame-index-scalar-bit-ops.mir | 76 +- ...minate-frame-index-v-add-co-u32-wave32.mir | 76 +- .../eliminate-frame-index-v-add-co-u32.mir | 582 +- .../eliminate-frame-index-v-add-u32.mir | 229 +- llvm/test/CodeGen/AMDGPU/frame-index.mir | 376 +- .../CodeGen/AMDGPU/function-args-inreg.ll | 19 +- .../AMDGPU/gfx-callable-argument-types.ll | 1166 ++-- .../gfx-callable-preserved-registers.ll | 68 +- .../AMDGPU/gfx-callable-return-types.ll | 24 +- .../CodeGen/AMDGPU/insert-waitcnts-crash.ll | 220 +- ...egrewriter-live-out-undef-subregisters.mir | 209 + .../local-stack-alloc-block-sp-reference.ll | 10 +- llvm/test/CodeGen/AMDGPU/nested-calls.ll | 4 +- .../AMDGPU/no-source-locations-in-prologue.ll | 212 + llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 8 +- .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir | 687 ++ .../CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir | 568 ++ .../CodeGen/AMDGPU/pei-build-av-spill.mir | 3096 ++++++++- .../AMDGPU/pei-build-spill-partial-agpr.mir | 116 + llvm/test/CodeGen/AMDGPU/pei-build-spill.mir | 2280 ++++++- .../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 127 + .../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 66 + .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 33 + .../AMDGPU/pei-scavenge-vgpr-spill.mir | 210 + .../AMDGPU/pei-vgpr-block-spill-csr.mir | 622 +- .../AMDGPU/preserve-only-inactive-lane.mir | 3 + .../AMDGPU/preserve-wwm-copy-dst-reg.ll | 6 +- .../AMDGPU/prologue-epilogue-markers.ll | 2 + llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll | 12 + .../CodeGen/AMDGPU/same-slot-agpr-sgpr.mir | 16 + .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 51 + .../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir | 395 ++ .../AMDGPU/sgpr-spill-vmem-large-frame.mir | 3 + .../si-optimize-vgpr-live-range-dbg-instr.ll | 9 + llvm/test/CodeGen/AMDGPU/sibling-call.ll | 6 +- .../AMDGPU/spill-agpr-partially-undef.mir | 14 +- llvm/test/CodeGen/AMDGPU/spill-agpr.mir | 538 ++ .../AMDGPU/spill-reg-tuple-super-reg-use.mir | 24 + .../AMDGPU/spill-sgpr-used-for-exec-copy.mir | 14 + .../CodeGen/AMDGPU/spill-to-agpr-partial.mir | 56 + llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir | 9 + llvm/test/CodeGen/AMDGPU/spillv16.mir | 3 + .../CodeGen/AMDGPU/split-arg-dbg-value.ll | 18 + llvm/test/CodeGen/AMDGPU/stack-realign.ll | 26 +- .../AMDGPU/strictfp_f16_abi_promote.ll | 4 +- .../CodeGen/AMDGPU/swdev504645-global-fold.ll | 2 +- .../AMDGPU/tail-call-inreg-arguments.error.ll | 2 +- ...d-op-for-wwm-scratch-reg-spill-restore.mir | 21 + .../AMDGPU/track-spilled-vgpr-liveness.mir | 9 + .../AMDGPU/tuple-allocation-failure.ll | 2 +- ...unfold-masked-merge-scalar-variablemask.ll | 6 +- .../CodeGen/AMDGPU/use_restore_frame_reg.mir | 66 + ...tor-spill-restore-to-other-vector-type.mir | 64 + .../CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir | 2136 +++++++ llvm/test/CodeGen/AMDGPU/vgpr-spill.mir | 22 +- .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 30 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 8 +- .../AMDGPU/whole-wave-functions-pei.mir | 63 + .../CodeGen/AMDGPU/whole-wave-functions.ll | 4473 ++++++++++++- llvm/test/DebugInfo/AMDGPU/cfi.ll | 3 + llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll | 12 + ...dgpu_generated_funcs.ll.generated.expected | 27 + ...pu_generated_funcs.ll.nogenerated.expected | 27 + 88 files changed, 29857 insertions(+), 1291 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 71356aa2aced1..5a0b1afbdfdff 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -29,6 +30,10 @@ static cl::opt EnableSpillVGPRToAGPR( cl::ReallyHidden, cl::init(true)); +static constexpr unsigned SGPRBitSize = 32; +static constexpr unsigned SGPRByteSize = SGPRBitSize / 8; +static constexpr unsigned VGPRLaneBitSize = 32; + // Find a register matching \p RC from \p LiveUnits which is unused and // available throughout the function. On failure, returns AMDGPU::NoRegister. // TODO: Rewrite the loop here to iterate over MCRegUnits instead of @@ -54,6 +59,72 @@ static bool needsFrameMoves(const MachineFunction &MF) { return true; } +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + assert(DwarfReg >= 0); + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +static MCCFIInstruction +createScaledCFAInPrivateWave(const GCNSubtarget &ST, + MCRegister DwarfStackPtrReg) { + assert(ST.enableFlatScratch()); + + // When flat scratch is enabled, the stack pointer is an address in the + // private_lane DWARF address space (i.e. swizzled), but in order to + // accurately and efficiently describe things like masked spills of vector + // registers we want to define the CFA to be an address in the private_wave + // DWARF address space (i.e. unswizzled). To achieve this we scale the stack + // pointer by the wavefront size, implemented as (SP << wave_size_log2). + const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2(); + assert(WavefrontSizeLog2 < 32); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(DwarfStackPtrReg, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size) << uint8_t(SGPRByteSize) + << uint8_t(dwarf::DW_OP_lit0 + WavefrontSizeLog2) + << uint8_t(dwarf::DW_OP_shl) + << uint8_t(dwarf::DW_OP_lit0 + + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave) + << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_form_aspace_address); + + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + OSCFIInst << uint8_t(dwarf::DW_CFA_def_cfa_expression); + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + return MCCFIInstruction::createEscape(nullptr, OSCFIInst.str()); +} + +void SIFrameLowering::emitDefCFA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags) const { + MachineFunction &MF = *MBB.getParent(); + const GCNSubtarget &ST = MF.getSubtarget(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); + + MCRegister DwarfStackPtrReg = MCRI->getDwarfRegNum(StackPtrReg, false); + MCCFIInstruction CFIInst = + ST.enableFlatScratch() + ? createScaledCFAInPrivateWave(ST, DwarfStackPtrReg) + : (AspaceAlreadyDefined + ? MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, DwarfStackPtrReg, 0, + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave, SMLoc()) + : MCCFIInstruction::createDefCfaRegister(nullptr, + DwarfStackPtrReg)); + buildCFI(MBB, MBBI, DL, CFIInst, Flags); +} + // Find a scratch register that we can use in the prologue. We avoid using // callee-save registers since they may appear to be free when this is called // from canUseAsPrologue (during shrink wrapping), but then no longer be free @@ -242,6 +313,8 @@ class PrologEpilogSGPRSpillBuilder { SIMachineFunctionInfo *FuncInfo; const SIInstrInfo *TII; const SIRegisterInfo &TRI; + const MCRegisterInfo *MCRI; + const SIFrameLowering *TFI; Register SuperReg; const PrologEpilogSGPRSaveRestoreInfo SI; LiveRegUnits &LiveUnits; @@ -250,9 +323,16 @@ class PrologEpilogSGPRSpillBuilder { ArrayRef SplitParts; unsigned NumSubRegs; unsigned EltSize = 4; + bool IsFramePtrPrologSpill; + bool NeedsFrameMoves; + + bool isExec(Register Reg) const { + return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC; + } void saveToMemory(const int FI) const { MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); assert(!MFI.isDeadObjectIndex(FI)); initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); @@ -271,6 +351,20 @@ class PrologEpilogSGPRSpillBuilder { buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, FI, FrameReg, DwordOff); + if (NeedsFrameMoves) { + if (isExec(SuperReg) && (I == NumSubRegs - 1)) + SubReg = AMDGPU::EXEC; + else if (IsFramePtrPrologSpill) + SubReg = FuncInfo->getFrameOffsetReg(); + + // FIXME: CFI for EXEC needs a fix by accurately computing the spill + // offset for both the low and high components. + if (SubReg != AMDGPU::EXEC_LO) + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(SubReg, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); + } DwordOff += 4; } } @@ -292,6 +386,19 @@ class PrologEpilogSGPRSpillBuilder { .addReg(SubReg) .addImm(Spill[I].Lane) .addReg(Spill[I].VGPR, RegState::Undef); + if (NeedsFrameMoves) { + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, AMDGPU::EXEC, Spill); + } else if (IsFramePtrPrologSpill) { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, + FuncInfo->getFrameOffsetReg(), + Spill[I].VGPR, Spill[I].Lane); + } else { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[I].VGPR, + Spill[I].Lane); + } + } } } @@ -299,10 +406,35 @@ class PrologEpilogSGPRSpillBuilder { BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) .addReg(SuperReg) .setMIFlag(MachineInstr::FrameSetup); + if (NeedsFrameMoves) { + const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(DstReg); + ArrayRef DstSplitParts = TRI.getRegSplitParts(RC, EltSize); + unsigned DstNumSubRegs = DstSplitParts.empty() ? 1 : DstSplitParts.size(); + assert(NumSubRegs == DstNumSubRegs); + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SrcSubReg = + NumSubRegs == 1 ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + Register DstSubReg = + NumSubRegs == 1 ? DstReg + : Register(TRI.getSubReg(DstReg, DstSplitParts[I])); + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForRegToSGPRPairSpill(MBB, MI, DL, AMDGPU::EXEC, + DstReg); + } else { + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(SrcSubReg, false), + MCRI->getDwarfRegNum(DstSubReg, false))); + } + } + } } void restoreFromMemory(const int FI) { MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( @@ -354,12 +486,15 @@ class PrologEpilogSGPRSpillBuilder { MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, - LiveRegUnits &LiveUnits, Register FrameReg) + LiveRegUnits &LiveUnits, Register FrameReg, + bool IsFramePtrPrologSpill = false) : MI(MI), MBB(MBB), MF(*MBB.getParent()), ST(MF.getSubtarget()), MFI(MF.getFrameInfo()), FuncInfo(MF.getInfo()), TII(TII), TRI(TRI), - SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), - FrameReg(FrameReg) { + MCRI(MF.getContext().getRegisterInfo()), TFI(ST.getFrameLowering()), + SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), FrameReg(FrameReg), + IsFramePtrPrologSpill(IsFramePtrPrologSpill), + NeedsFrameMoves(needsFrameMoves(MF)) { const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); SplitParts = TRI.getRegSplitParts(RC, EltSize); NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); @@ -967,6 +1102,50 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { llvm_unreachable("Invalid TargetStackID::Value"); } +void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + Register StackPtrReg = + MF.getInfo()->getStackPtrOffsetReg(); + + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/true, + MachineInstr::FrameSetup); + + buildCFIForRegToSGPRPairSpill(MBB, MBBI, DL, AMDGPU::PC_REG, + TRI.getReturnAddressReg(MF)); + + BitVector IsCalleeSaved(TRI.getNumRegs()); + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + for (unsigned I = 0; CSRegs[I]; ++I) { + IsCalleeSaved.set(CSRegs[I]); + } + auto ProcessReg = [&](MCPhysReg Reg) { + if (IsCalleeSaved.test(Reg) || !MRI.isPhysRegModified(Reg)) + return; + MCRegister DwarfReg = MCRI->getDwarfRegNum(Reg, false); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createUndefined(nullptr, DwarfReg)); + }; + + // Emit CFI rules for caller saved Arch VGPRs which are clobbered + unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256; + for_each(AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs), + ProcessReg); + + // Emit CFI rules for caller saved Accum VGPRs which are clobbered + if (ST.hasMAIInsts()) { + for_each(AMDGPU::AGPR_32RegClass.getRegisters(), ProcessReg); + } + + // Emit CFI rules for caller saved SGPRs which are clobbered + for_each(AMDGPU::SGPR_32RegClass.getRegisters(), ProcessReg); +} + // Activate only the inactive lanes when \p EnableInactiveLanes is true. // Otherwise, activate all lanes. It returns the saved exec. static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, @@ -1013,14 +1192,19 @@ static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, return ScratchExecCopy; } -void SIFrameLowering::emitCSRSpillStores( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, - Register FrameReg, Register FramePtrRegScratchCopy) const { +void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc &DL, LiveRegUnits &LiveUnits, + Register FrameReg, + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST); @@ -1042,6 +1226,12 @@ void SIFrameLowering::emitCSRSpillStores( int FI = Reg.second; buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL, VGPR, FI, FrameReg); + if (NeedsFrameMoves) + // We spill the entire VGPR, so we can get away with just cfi_offset + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } }; @@ -1090,13 +1280,13 @@ void SIFrameLowering::emitCSRSpillStores( // Skip if FP is saved to a scratch SGPR, the save has already been emitted. // Otherwise, FP has been moved to a temporary register and spill it // instead. - Register Reg = - Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first; + bool IsFramePtrPrologSpill = Spill.first == FramePtrReg ? true : false; + Register Reg = IsFramePtrPrologSpill ? FramePtrRegScratchCopy : Spill.first; if (!Reg) continue; PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, - LiveUnits, FrameReg); + LiveUnits, FrameReg, IsFramePtrPrologSpill); SB.save(); } @@ -1264,6 +1454,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, uint32_t NumBytes = MFI.getStackSize(); uint32_t RoundedSize = NumBytes; + const bool NeedsFrameMoves = needsFrameMoves(MF); + + if (NeedsFrameMoves) + emitPrologueEntryCFI(MBB, MBBI, DL); + if (TRI.hasStackRealignment(MF)) HasFP = true; @@ -1272,7 +1467,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // Emit the CSR spill stores with SP base register. emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FuncInfo->isChainFunction() ? Register() : StackPtrReg, - FramePtrRegScratchCopy); + FramePtrRegScratchCopy, NeedsFrameMoves); } else { // CSR spill stores will use FP as base register. Register SGPRForFPSaveRestoreCopy = @@ -1286,7 +1481,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, PrologEpilogSGPRSpillBuilder SB( FramePtrReg, FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI, - DL, TII, TRI, LiveUnits, FramePtrReg); + DL, TII, TRI, LiveUnits, FramePtrReg, + /*IsFramePtrPrologSpill*/ true); SB.save(); LiveUnits.addReg(SGPRForFPSaveRestoreCopy); } else { @@ -1333,7 +1529,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // If FP is used, emit the CSR spills with FP base register. if (HasFP) { emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg, - FramePtrRegScratchCopy); + FramePtrRegScratchCopy, NeedsFrameMoves); if (FramePtrRegScratchCopy) LiveUnits.removeReg(FramePtrRegScratchCopy); } @@ -1348,6 +1544,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + if (HasFP) { + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, FramePtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameSetup); + } + if (HasFP && RoundedSize != 0) { auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) .addReg(StackPtrReg) @@ -1447,6 +1649,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, FramePtrRegScratchCopy); } + const bool NeedsFrameMoves = needsFrameMoves(MF); + if (hasFP(MF)) { + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameDestroy); + } + if (FPSaved) { // Insert the copy to restore FP. Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy @@ -2257,3 +2466,72 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB, .addCFIIndex(MF.addFrameInst(CFIInst)) .setMIFlag(flag); } + +MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, const Register VGPR, + const int Lane) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + assert(DwarfSGPR != -1 && DwarfVGPR != -1); + assert(Lane != -1 && "Expected a lane to be present"); + + // Build a CFI instruction that represents a SGPR spilled to a single lane of + // a VGPR. + MCCFIInstruction::VectorRegisterWithLane VR{unsigned(DwarfVGPR), + unsigned(Lane), VGPRLaneBitSize}; + auto CFIInst = + MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR, {VR}); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + assert(DwarfSGPR != -1); + + // Build a CFI instruction that represents a SGPR spilled to multiple lanes of + // multiple VGPRs. + + std::vector VGPRs; + for (SIRegisterInfo::SpilledReg Spill : VGPRSpills) { + int DwarfVGPR = MCRI.getDwarfRegNum(Spill.VGPR, false); + assert(DwarfVGPR != -1); + assert(Spill.hasLane() && "Expected a lane to be present"); + VGPRs.push_back( + {unsigned(DwarfVGPR), unsigned(Spill.Lane), VGPRLaneBitSize}); + } + + auto CFIInst = MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR, + std::move(VGPRs)); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register Reg, const Register SGPRPair) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + + int SGPR0 = TRI.getSubReg(SGPRPair, AMDGPU::sub0); + int SGPR1 = TRI.getSubReg(SGPRPair, AMDGPU::sub1); + + int DwarfReg = MCRI.getDwarfRegNum(Reg, false); + int DwarfSGPR0 = MCRI.getDwarfRegNum(SGPR0, false); + int DwarfSGPR1 = MCRI.getDwarfRegNum(SGPR1, false); + assert(DwarfReg != -1 && DwarfSGPR0 != 1 && DwarfSGPR1 != 1); + + auto CFIInst = MCCFIInstruction::createLLVMRegisterPair( + nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 0b691d8f15a48..20f608f2dfc24 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -39,7 +39,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering { void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, - Register FramePtrRegScratchCopy) const; + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const; void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, @@ -101,6 +102,15 @@ class SIFrameLowering final : public AMDGPUFrameLowering { Register PreloadedPrivateBufferReg, Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const; + void emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const; + + void emitDefCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const; + public: bool requiresStackPointerReference(const MachineFunction &MF) const; @@ -110,6 +120,24 @@ class SIFrameLowering final : public AMDGPUFrameLowering { const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const; + /// Create a CFI index describing a spill of an SGPR to a single lane of + /// a VGPR and build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const Register SGPR, + const Register VGPR, + const int Lane) const; + /// Create a CFI index describing a spill of an SGPR to multiple lanes of + /// VGPRs and build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const; + MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + Register SGPRPair) const; // Returns true if the function may need to reserve space on the stack for the // CWSR trap handler. bool mayReserveScratchForCWSR(const MachineFunction &MF) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index c16c8e2128c72..e3228162be22a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -222,11 +222,11 @@ define void @func_caller_stack() { ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_mov_b32_e32 v0, 9 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; MUBUF-NEXT: v_mov_b32_e32 v0, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; MUBUF-NEXT: v_mov_b32_e32 v0, 11 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 @@ -257,8 +257,8 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: s_add_u32 s0, s32, 4 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 @@ -300,10 +300,10 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen ; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4 @@ -382,9 +382,9 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 ; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll index 8cb9a5486a2de..b17324a38ada0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -363,7 +363,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff @@ -377,6 +376,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX9-NEXT: s_add_u32 s32, s5, s4 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s7 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -394,7 +394,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff @@ -408,6 +407,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX10-NEXT: s_add_u32 s32, s5, s4 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s7 +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32: @@ -424,7 +424,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff @@ -439,6 +438,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX11-NEXT: s_add_u32 s32, s1, s0 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s3 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, ptr addrspace(4) @gv %alloca = alloca i32, i32 %n, align 32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index c295a662704e9..c100d653c1cd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -235,11 +235,11 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) { ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-NEXT: v_writelane_b32 v40, s16, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s16, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index 21f459ac033ca..9839af011ecdb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -151,8 +151,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s7, s33 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB2_3 @@ -217,9 +217,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; GCN-NEXT: s_mov_b32 s7, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xfc0 ; GCN-NEXT: s_mov_b32 s8, s34 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_addk_i32 s32, 0x2000 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB3_2 diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index c1617574becc3..4ff3f5c13d42a 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -26,6 +26,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -49,6 +369,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -520,6 +984,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -544,6 +1328,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -1044,6 +1972,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1069,6 +2317,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -1541,6 +2933,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -1567,6 +3279,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -2067,6 +3923,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -2094,6 +4270,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -2567,6 +4887,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -2595,6 +5235,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -3095,6 +5879,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -3118,6 +6222,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr0, $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -3589,6 +6837,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -3613,6 +7181,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr0, $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -4112,6 +7824,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -4137,6 +8169,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -4609,6 +8785,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -4635,6 +9131,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -5133,6 +9773,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -5160,6 +10120,150 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec @@ -5633,6 +10737,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -5661,6 +11085,150 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir index 1573903945a3e..7f26e413cf780 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir @@ -11,6 +11,16 @@ body: | ; GFX908-LABEL: name: standard ; GFX908: liveins: $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec @@ -42,6 +52,14 @@ body: | ; GFX908-LABEL: name: src_is_spill ; GFX908: liveins: $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX908-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -66,6 +84,12 @@ body: | ; GFX908-LABEL: name: overlapping_agpr ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr1_agpr2_agpr3_agpr4 ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir index 47d489b7f35ca..6e5f8aceaf169 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir @@ -18,6 +18,54 @@ body: | ; GFX942-LABEL: name: agpr_spill_copy ; GFX942: liveins: $agpr30, $agpr31 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GFX942-NEXT: renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF ; GFX942-NEXT: renamable $agpr28_agpr29 = IMPLICIT_DEF ; GFX942-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll index 356bf4b3cac28..eb6482401f764 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll @@ -22,9 +22,9 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 +; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 ; DAGISEL-NEXT: v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1 ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 ; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo ; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 @@ -65,9 +65,9 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GISEL-NEXT: scratch_store_b32 off, v41, s33 +; GISEL-NEXT: v_writelane_b32 v42, s30, 0 ; GISEL-NEXT: v_dual_mov_b32 v40, v1 :: v_dual_mov_b32 v41, v2 ; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; GISEL-NEXT: v_writelane_b32 v42, s30, 0 ; GISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi ; GISEL-NEXT: s_add_co_i32 s32, s32, 16 @@ -138,152 +138,291 @@ define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 @@ -453,152 +592,291 @@ define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 @@ -779,10 +1057,10 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 2 +; DAGISEL-NEXT: v_writelane_b32 v40, s30, 0 ; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo ; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 -; DAGISEL-NEXT: v_writelane_b32 v40, s30, 0 ; DAGISEL-NEXT: v_writelane_b32 v40, s31, 1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -814,10 +1092,10 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_mov_b32 exec_lo, s1 ; GISEL-NEXT: v_writelane_b32 v40, s0, 2 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 ; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi ; GISEL-NEXT: s_add_co_i32 s32, s32, 16 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index 74552a500ac51..d821801677d79 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -10668,8 +10668,11 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v16, s32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v17, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v18, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v16, s30, 0 @@ -48648,8 +48651,11 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v75, s30, 0 @@ -84633,8 +84639,11 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v16, s32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v17, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v18, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v16, s30, 0 @@ -120601,8 +120610,11 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:92 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v76, s30, 0 @@ -172196,8 +172208,11 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 @@ -173759,8 +173774,11 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 @@ -196717,8 +196735,11 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v75, s30, 0 @@ -219070,8 +219091,11 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v75, s30, 0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 2889f37a65d97..7f6bb85827d31 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -33,19 +33,21 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo ; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi -; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX11-NEXT: s_endpgm ; ; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 @@ -58,26 +60,27 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo ; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm ; ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo -; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX11-NEXT: s_endpgm ; ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 @@ -90,7 +93,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr) @@ -102,7 +104,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24 @@ -123,6 +125,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25 ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 @@ -162,6 +165,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10 @@ -170,7 +174,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15 -; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 @@ -230,7 +233,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24 @@ -251,6 +254,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25 ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 @@ -290,6 +294,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13 @@ -298,7 +303,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8 -; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 @@ -361,10 +365,10 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX11-LABEL: alloca_and_call: ; GISEL-GFX11: ; %bb.0: ; %.entry ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo ; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi -; GISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -373,6 +377,7 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX10-LABEL: alloca_and_call: ; GISEL-GFX10: ; %bb.0: ; %.entry ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo @@ -380,17 +385,16 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm ; ; DAGISEL-GFX11-LABEL: alloca_and_call: ; DAGISEL-GFX11: ; %bb.0: ; %.entry ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 ; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo -; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -399,6 +403,7 @@ define amdgpu_cs_chain void @alloca_and_call() { ; DAGISEL-GFX10-LABEL: alloca_and_call: ; DAGISEL-GFX10: ; %bb.0: ; %.entry ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi @@ -406,7 +411,6 @@ define amdgpu_cs_chain void @alloca_and_call() { ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 -; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm .entry: diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index a2ec87053a8d5..4f1a6cb2c48d8 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -27,8 +27,473 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 4352 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir index 7336a54ae42db..72b6b9f9ec686 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir @@ -19,11 +19,17 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $agpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 -1 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr62, 256 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62 diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index d1cede64ce71d..1e04fc1da938f 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -69,11 +69,11 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -129,11 +129,11 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -189,11 +189,11 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -250,11 +250,11 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -311,11 +311,11 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -373,11 +373,11 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s16 ; GFX11-NEXT: v_writelane_b32 v40, s3, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -436,11 +436,11 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s17 ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -503,11 +503,11 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s21 ; GFX11-NEXT: v_writelane_b32 v40, s20, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[20:21] ; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -563,11 +563,11 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -623,11 +623,11 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -683,11 +683,11 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -744,11 +744,11 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -804,11 +804,11 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -865,11 +865,11 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -926,11 +926,11 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -987,11 +987,11 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1048,11 +1048,11 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p0_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p0_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1109,11 +1109,11 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1169,11 +1169,11 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p3_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p3_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1232,11 +1232,11 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s17 ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v2p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v2p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1293,11 +1293,11 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2p5_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2p5_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1357,11 +1357,11 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s18 ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1429,11 +1429,11 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s26 ; GFX11-NEXT: v_writelane_b32 v40, s25, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[26:27] ; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1504,11 +1504,11 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s18 ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 8e12e7e03947b..2f2d2005ea2ae 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -5852,6 +5852,7 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 11 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -5860,7 +5861,6 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 14 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 15 ; VI-NEXT: v_writelane_b32 v40, s30, 0 @@ -5920,6 +5920,7 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 11 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -5928,7 +5929,6 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 14 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 15 ; CI-NEXT: v_writelane_b32 v40, s30, 0 @@ -5988,6 +5988,7 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 11 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -5996,7 +5997,6 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -6106,6 +6106,7 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 11 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6114,7 +6115,6 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 14 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 15 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 @@ -6191,6 +6191,7 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6199,7 +6200,6 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; VI-NEXT: v_writelane_b32 v40, s30, 0 @@ -6259,6 +6259,7 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6267,7 +6268,6 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; CI-NEXT: v_writelane_b32 v40, s30, 0 @@ -6327,6 +6327,7 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6335,7 +6336,6 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -6449,6 +6449,7 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6457,7 +6458,6 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 @@ -6534,6 +6534,7 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 7 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6550,7 +6551,6 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 14 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 15 ; VI-NEXT: v_writelane_b32 v40, s30, 0 @@ -6610,6 +6610,7 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 7 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6626,7 +6627,6 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 14 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 15 ; CI-NEXT: v_writelane_b32 v40, s30, 0 @@ -6686,6 +6686,7 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 7 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6702,7 +6703,6 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -6817,6 +6817,7 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 7 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6833,7 +6834,6 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 14 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 15 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 @@ -6906,6 +6906,7 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6922,7 +6923,6 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; VI-NEXT: v_writelane_b32 v40, s30, 0 @@ -6982,6 +6982,7 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -6998,7 +6999,6 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; CI-NEXT: v_writelane_b32 v40, s30, 0 @@ -7058,6 +7058,7 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -7074,7 +7075,6 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -7192,6 +7192,7 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -7208,7 +7209,6 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll index fccee3da6d77e..fcc032b51fe58 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -420,8 +420,8 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 @@ -453,8 +453,8 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 @@ -486,8 +486,8 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 @@ -939,8 +939,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[6:7] -; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s4, 2 +; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: s_getpc_b64 s[4:5] ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -1003,8 +1003,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] -; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s4, 2 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -1081,9 +1081,9 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -1396,6 +1396,7 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 @@ -1403,7 +1404,6 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index bb2f06bfe83f8..2854bdca76d01 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -265,8 +265,8 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 @@ -298,8 +298,8 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 @@ -331,8 +331,8 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 @@ -651,8 +651,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -729,9 +729,9 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -970,6 +970,7 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 @@ -977,7 +978,6 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index 6504f48333485..209ac8e811456 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -15,6 +15,12 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll index 40ff6ccf0cb0f..676144e65c10f 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-frame.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -22,6 +22,8 @@ define hidden void @func_no_clobber() #0 { ; CHECK: .Lfunc_begin1: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -33,9 +35,183 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX900: .Lfunc_begin2: ; GFX900-NEXT: .cfi_startproc ; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: .cfi_undefined 2560 +; GFX900-NEXT: .cfi_undefined 2561 +; GFX900-NEXT: .cfi_undefined 2562 +; GFX900-NEXT: .cfi_undefined 2563 +; GFX900-NEXT: .cfi_undefined 2564 +; GFX900-NEXT: .cfi_undefined 2565 +; GFX900-NEXT: .cfi_undefined 2566 +; GFX900-NEXT: .cfi_undefined 2567 +; GFX900-NEXT: .cfi_undefined 2568 +; GFX900-NEXT: .cfi_undefined 2569 +; GFX900-NEXT: .cfi_undefined 2570 +; GFX900-NEXT: .cfi_undefined 2571 +; GFX900-NEXT: .cfi_undefined 2572 +; GFX900-NEXT: .cfi_undefined 2573 +; GFX900-NEXT: .cfi_undefined 2574 +; GFX900-NEXT: .cfi_undefined 2575 +; GFX900-NEXT: .cfi_undefined 2576 +; GFX900-NEXT: .cfi_undefined 2577 +; GFX900-NEXT: .cfi_undefined 2578 +; GFX900-NEXT: .cfi_undefined 2579 +; GFX900-NEXT: .cfi_undefined 2580 +; GFX900-NEXT: .cfi_undefined 2581 +; GFX900-NEXT: .cfi_undefined 2582 +; GFX900-NEXT: .cfi_undefined 2583 +; GFX900-NEXT: .cfi_undefined 2584 +; GFX900-NEXT: .cfi_undefined 2585 +; GFX900-NEXT: .cfi_undefined 2586 +; GFX900-NEXT: .cfi_undefined 2587 +; GFX900-NEXT: .cfi_undefined 2588 +; GFX900-NEXT: .cfi_undefined 2589 +; GFX900-NEXT: .cfi_undefined 2590 +; GFX900-NEXT: .cfi_undefined 2591 +; GFX900-NEXT: .cfi_undefined 2592 +; GFX900-NEXT: .cfi_undefined 2593 +; GFX900-NEXT: .cfi_undefined 2594 +; GFX900-NEXT: .cfi_undefined 2595 +; GFX900-NEXT: .cfi_undefined 2596 +; GFX900-NEXT: .cfi_undefined 2597 +; GFX900-NEXT: .cfi_undefined 2598 +; GFX900-NEXT: .cfi_undefined 2599 +; GFX900-NEXT: .cfi_undefined 2608 +; GFX900-NEXT: .cfi_undefined 2609 +; GFX900-NEXT: .cfi_undefined 2610 +; GFX900-NEXT: .cfi_undefined 2611 +; GFX900-NEXT: .cfi_undefined 2612 +; GFX900-NEXT: .cfi_undefined 2613 +; GFX900-NEXT: .cfi_undefined 2614 +; GFX900-NEXT: .cfi_undefined 2615 +; GFX900-NEXT: .cfi_undefined 2624 +; GFX900-NEXT: .cfi_undefined 2625 +; GFX900-NEXT: .cfi_undefined 2626 +; GFX900-NEXT: .cfi_undefined 2627 +; GFX900-NEXT: .cfi_undefined 2628 +; GFX900-NEXT: .cfi_undefined 2629 +; GFX900-NEXT: .cfi_undefined 2630 +; GFX900-NEXT: .cfi_undefined 2631 +; GFX900-NEXT: .cfi_undefined 2640 +; GFX900-NEXT: .cfi_undefined 2641 +; GFX900-NEXT: .cfi_undefined 2642 +; GFX900-NEXT: .cfi_undefined 2643 +; GFX900-NEXT: .cfi_undefined 2644 +; GFX900-NEXT: .cfi_undefined 2645 +; GFX900-NEXT: .cfi_undefined 2646 +; GFX900-NEXT: .cfi_undefined 2647 +; GFX900-NEXT: .cfi_undefined 2656 +; GFX900-NEXT: .cfi_undefined 2657 +; GFX900-NEXT: .cfi_undefined 2658 +; GFX900-NEXT: .cfi_undefined 2659 +; GFX900-NEXT: .cfi_undefined 2660 +; GFX900-NEXT: .cfi_undefined 2661 +; GFX900-NEXT: .cfi_undefined 2662 +; GFX900-NEXT: .cfi_undefined 2663 +; GFX900-NEXT: .cfi_undefined 2672 +; GFX900-NEXT: .cfi_undefined 2673 +; GFX900-NEXT: .cfi_undefined 2674 +; GFX900-NEXT: .cfi_undefined 2675 +; GFX900-NEXT: .cfi_undefined 2676 +; GFX900-NEXT: .cfi_undefined 2677 +; GFX900-NEXT: .cfi_undefined 2678 +; GFX900-NEXT: .cfi_undefined 2679 +; GFX900-NEXT: .cfi_undefined 2688 +; GFX900-NEXT: .cfi_undefined 2689 +; GFX900-NEXT: .cfi_undefined 2690 +; GFX900-NEXT: .cfi_undefined 2691 +; GFX900-NEXT: .cfi_undefined 2692 +; GFX900-NEXT: .cfi_undefined 2693 +; GFX900-NEXT: .cfi_undefined 2694 +; GFX900-NEXT: .cfi_undefined 2695 +; GFX900-NEXT: .cfi_undefined 2704 +; GFX900-NEXT: .cfi_undefined 2705 +; GFX900-NEXT: .cfi_undefined 2706 +; GFX900-NEXT: .cfi_undefined 2707 +; GFX900-NEXT: .cfi_undefined 2708 +; GFX900-NEXT: .cfi_undefined 2709 +; GFX900-NEXT: .cfi_undefined 2710 +; GFX900-NEXT: .cfi_undefined 2711 +; GFX900-NEXT: .cfi_undefined 2720 +; GFX900-NEXT: .cfi_undefined 2721 +; GFX900-NEXT: .cfi_undefined 2722 +; GFX900-NEXT: .cfi_undefined 2723 +; GFX900-NEXT: .cfi_undefined 2724 +; GFX900-NEXT: .cfi_undefined 2725 +; GFX900-NEXT: .cfi_undefined 2726 +; GFX900-NEXT: .cfi_undefined 2727 +; GFX900-NEXT: .cfi_undefined 2736 +; GFX900-NEXT: .cfi_undefined 2737 +; GFX900-NEXT: .cfi_undefined 2738 +; GFX900-NEXT: .cfi_undefined 2739 +; GFX900-NEXT: .cfi_undefined 2740 +; GFX900-NEXT: .cfi_undefined 2741 +; GFX900-NEXT: .cfi_undefined 2742 +; GFX900-NEXT: .cfi_undefined 2743 +; GFX900-NEXT: .cfi_undefined 2752 +; GFX900-NEXT: .cfi_undefined 2753 +; GFX900-NEXT: .cfi_undefined 2754 +; GFX900-NEXT: .cfi_undefined 2755 +; GFX900-NEXT: .cfi_undefined 2756 +; GFX900-NEXT: .cfi_undefined 2757 +; GFX900-NEXT: .cfi_undefined 2758 +; GFX900-NEXT: .cfi_undefined 2759 +; GFX900-NEXT: .cfi_undefined 2768 +; GFX900-NEXT: .cfi_undefined 2769 +; GFX900-NEXT: .cfi_undefined 2770 +; GFX900-NEXT: .cfi_undefined 2771 +; GFX900-NEXT: .cfi_undefined 2772 +; GFX900-NEXT: .cfi_undefined 2773 +; GFX900-NEXT: .cfi_undefined 2774 +; GFX900-NEXT: .cfi_undefined 2775 +; GFX900-NEXT: .cfi_undefined 2784 +; GFX900-NEXT: .cfi_undefined 2785 +; GFX900-NEXT: .cfi_undefined 2786 +; GFX900-NEXT: .cfi_undefined 2787 +; GFX900-NEXT: .cfi_undefined 2788 +; GFX900-NEXT: .cfi_undefined 2789 +; GFX900-NEXT: .cfi_undefined 2790 +; GFX900-NEXT: .cfi_undefined 2791 +; GFX900-NEXT: .cfi_undefined 2800 +; GFX900-NEXT: .cfi_undefined 2801 +; GFX900-NEXT: .cfi_undefined 2802 +; GFX900-NEXT: .cfi_undefined 2803 +; GFX900-NEXT: .cfi_undefined 2804 +; GFX900-NEXT: .cfi_undefined 2805 +; GFX900-NEXT: .cfi_undefined 2806 +; GFX900-NEXT: .cfi_undefined 2807 +; GFX900-NEXT: .cfi_undefined 36 +; GFX900-NEXT: .cfi_undefined 37 +; GFX900-NEXT: .cfi_undefined 38 +; GFX900-NEXT: .cfi_undefined 39 +; GFX900-NEXT: .cfi_undefined 40 +; GFX900-NEXT: .cfi_undefined 41 +; GFX900-NEXT: .cfi_undefined 42 +; GFX900-NEXT: .cfi_undefined 43 +; GFX900-NEXT: .cfi_undefined 44 +; GFX900-NEXT: .cfi_undefined 45 +; GFX900-NEXT: .cfi_undefined 46 +; GFX900-NEXT: .cfi_undefined 47 +; GFX900-NEXT: .cfi_undefined 48 +; GFX900-NEXT: .cfi_undefined 49 +; GFX900-NEXT: .cfi_undefined 50 +; GFX900-NEXT: .cfi_undefined 51 +; GFX900-NEXT: .cfi_undefined 52 +; GFX900-NEXT: .cfi_undefined 53 +; GFX900-NEXT: .cfi_undefined 54 +; GFX900-NEXT: .cfi_undefined 55 +; GFX900-NEXT: .cfi_undefined 56 +; GFX900-NEXT: .cfi_undefined 57 +; GFX900-NEXT: .cfi_undefined 58 +; GFX900-NEXT: .cfi_undefined 59 +; GFX900-NEXT: .cfi_undefined 60 +; GFX900-NEXT: .cfi_undefined 61 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s40, s33 +; GFX900-NEXT: .cfi_register 65, 72 ; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: .cfi_def_cfa_register 65 ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill @@ -268,6 +444,7 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GFX900-NEXT: s_addk_i32 s32, 0x7100 ; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: .cfi_def_cfa_register 64 ; GFX900-NEXT: s_mov_b32 s33, s40 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] @@ -276,9 +453,183 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-DIS: .Lfunc_begin2: ; GFX90A-V2A-DIS-NEXT: .cfi_startproc ; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 36 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 37 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 38 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 39 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 40 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 41 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 42 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 43 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 44 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 45 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 46 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 47 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 48 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 49 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 50 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 51 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 52 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 53 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 54 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 55 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 56 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 57 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 58 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 59 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 60 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 61 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-DIS-NEXT: .cfi_register 65, 72 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill @@ -511,6 +862,7 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 64 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s40 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) ; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] @@ -519,9 +871,215 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-EN: .Lfunc_begin2: ; GFX90A-V2A-EN-NEXT: .cfi_startproc ; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 36 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 37 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 38 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 39 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 40 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 41 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 42 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 43 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 44 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 45 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 46 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 47 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 48 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 49 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 50 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 51 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 52 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 53 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 54 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 55 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 56 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 57 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 58 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 59 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 60 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 61 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-EN-NEXT: .cfi_register 65, 72 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse @@ -754,6 +1312,7 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 64 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s40 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) ; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] @@ -762,9 +1321,183 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; WAVE32: .Lfunc_begin2: ; WAVE32-NEXT: .cfi_startproc ; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: .cfi_undefined 36 +; WAVE32-NEXT: .cfi_undefined 37 +; WAVE32-NEXT: .cfi_undefined 38 +; WAVE32-NEXT: .cfi_undefined 39 +; WAVE32-NEXT: .cfi_undefined 40 +; WAVE32-NEXT: .cfi_undefined 41 +; WAVE32-NEXT: .cfi_undefined 42 +; WAVE32-NEXT: .cfi_undefined 43 +; WAVE32-NEXT: .cfi_undefined 44 +; WAVE32-NEXT: .cfi_undefined 45 +; WAVE32-NEXT: .cfi_undefined 46 +; WAVE32-NEXT: .cfi_undefined 47 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: .cfi_undefined 50 +; WAVE32-NEXT: .cfi_undefined 51 +; WAVE32-NEXT: .cfi_undefined 52 +; WAVE32-NEXT: .cfi_undefined 53 +; WAVE32-NEXT: .cfi_undefined 54 +; WAVE32-NEXT: .cfi_undefined 55 +; WAVE32-NEXT: .cfi_undefined 56 +; WAVE32-NEXT: .cfi_undefined 57 +; WAVE32-NEXT: .cfi_undefined 58 +; WAVE32-NEXT: .cfi_undefined 59 +; WAVE32-NEXT: .cfi_undefined 60 +; WAVE32-NEXT: .cfi_undefined 61 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: s_mov_b32 s40, s33 +; WAVE32-NEXT: .cfi_register 65, 72 ; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: .cfi_def_cfa_register 65 ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill ; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill @@ -999,6 +1732,7 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; WAVE32-NEXT: s_addk_i32 s32, 0x3880 ; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: .cfi_def_cfa_register 64 ; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 ; WAVE32-NEXT: s_mov_b32 s33, s40 ; WAVE32-NEXT: s_waitcnt vmcnt(0) @@ -1046,13 +1780,224 @@ define hidden void @func_call_clobber() #0 { ; GFX900: .Lfunc_begin3: ; GFX900-NEXT: .cfi_startproc ; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: .cfi_undefined 2560 +; GFX900-NEXT: .cfi_undefined 2561 +; GFX900-NEXT: .cfi_undefined 2562 +; GFX900-NEXT: .cfi_undefined 2563 +; GFX900-NEXT: .cfi_undefined 2564 +; GFX900-NEXT: .cfi_undefined 2565 +; GFX900-NEXT: .cfi_undefined 2566 +; GFX900-NEXT: .cfi_undefined 2567 +; GFX900-NEXT: .cfi_undefined 2568 +; GFX900-NEXT: .cfi_undefined 2569 +; GFX900-NEXT: .cfi_undefined 2570 +; GFX900-NEXT: .cfi_undefined 2571 +; GFX900-NEXT: .cfi_undefined 2572 +; GFX900-NEXT: .cfi_undefined 2573 +; GFX900-NEXT: .cfi_undefined 2574 +; GFX900-NEXT: .cfi_undefined 2575 +; GFX900-NEXT: .cfi_undefined 2576 +; GFX900-NEXT: .cfi_undefined 2577 +; GFX900-NEXT: .cfi_undefined 2578 +; GFX900-NEXT: .cfi_undefined 2579 +; GFX900-NEXT: .cfi_undefined 2580 +; GFX900-NEXT: .cfi_undefined 2581 +; GFX900-NEXT: .cfi_undefined 2582 +; GFX900-NEXT: .cfi_undefined 2583 +; GFX900-NEXT: .cfi_undefined 2584 +; GFX900-NEXT: .cfi_undefined 2585 +; GFX900-NEXT: .cfi_undefined 2586 +; GFX900-NEXT: .cfi_undefined 2587 +; GFX900-NEXT: .cfi_undefined 2588 +; GFX900-NEXT: .cfi_undefined 2589 +; GFX900-NEXT: .cfi_undefined 2590 +; GFX900-NEXT: .cfi_undefined 2591 +; GFX900-NEXT: .cfi_undefined 2592 +; GFX900-NEXT: .cfi_undefined 2593 +; GFX900-NEXT: .cfi_undefined 2594 +; GFX900-NEXT: .cfi_undefined 2595 +; GFX900-NEXT: .cfi_undefined 2596 +; GFX900-NEXT: .cfi_undefined 2597 +; GFX900-NEXT: .cfi_undefined 2598 +; GFX900-NEXT: .cfi_undefined 2599 +; GFX900-NEXT: .cfi_undefined 2608 +; GFX900-NEXT: .cfi_undefined 2609 +; GFX900-NEXT: .cfi_undefined 2610 +; GFX900-NEXT: .cfi_undefined 2611 +; GFX900-NEXT: .cfi_undefined 2612 +; GFX900-NEXT: .cfi_undefined 2613 +; GFX900-NEXT: .cfi_undefined 2614 +; GFX900-NEXT: .cfi_undefined 2615 +; GFX900-NEXT: .cfi_undefined 2624 +; GFX900-NEXT: .cfi_undefined 2625 +; GFX900-NEXT: .cfi_undefined 2626 +; GFX900-NEXT: .cfi_undefined 2627 +; GFX900-NEXT: .cfi_undefined 2628 +; GFX900-NEXT: .cfi_undefined 2629 +; GFX900-NEXT: .cfi_undefined 2630 +; GFX900-NEXT: .cfi_undefined 2631 +; GFX900-NEXT: .cfi_undefined 2640 +; GFX900-NEXT: .cfi_undefined 2641 +; GFX900-NEXT: .cfi_undefined 2642 +; GFX900-NEXT: .cfi_undefined 2643 +; GFX900-NEXT: .cfi_undefined 2644 +; GFX900-NEXT: .cfi_undefined 2645 +; GFX900-NEXT: .cfi_undefined 2646 +; GFX900-NEXT: .cfi_undefined 2647 +; GFX900-NEXT: .cfi_undefined 2656 +; GFX900-NEXT: .cfi_undefined 2657 +; GFX900-NEXT: .cfi_undefined 2658 +; GFX900-NEXT: .cfi_undefined 2659 +; GFX900-NEXT: .cfi_undefined 2660 +; GFX900-NEXT: .cfi_undefined 2661 +; GFX900-NEXT: .cfi_undefined 2662 +; GFX900-NEXT: .cfi_undefined 2663 +; GFX900-NEXT: .cfi_undefined 2672 +; GFX900-NEXT: .cfi_undefined 2673 +; GFX900-NEXT: .cfi_undefined 2674 +; GFX900-NEXT: .cfi_undefined 2675 +; GFX900-NEXT: .cfi_undefined 2676 +; GFX900-NEXT: .cfi_undefined 2677 +; GFX900-NEXT: .cfi_undefined 2678 +; GFX900-NEXT: .cfi_undefined 2679 +; GFX900-NEXT: .cfi_undefined 2688 +; GFX900-NEXT: .cfi_undefined 2689 +; GFX900-NEXT: .cfi_undefined 2690 +; GFX900-NEXT: .cfi_undefined 2691 +; GFX900-NEXT: .cfi_undefined 2692 +; GFX900-NEXT: .cfi_undefined 2693 +; GFX900-NEXT: .cfi_undefined 2694 +; GFX900-NEXT: .cfi_undefined 2695 +; GFX900-NEXT: .cfi_undefined 2704 +; GFX900-NEXT: .cfi_undefined 2705 +; GFX900-NEXT: .cfi_undefined 2706 +; GFX900-NEXT: .cfi_undefined 2707 +; GFX900-NEXT: .cfi_undefined 2708 +; GFX900-NEXT: .cfi_undefined 2709 +; GFX900-NEXT: .cfi_undefined 2710 +; GFX900-NEXT: .cfi_undefined 2711 +; GFX900-NEXT: .cfi_undefined 2720 +; GFX900-NEXT: .cfi_undefined 2721 +; GFX900-NEXT: .cfi_undefined 2722 +; GFX900-NEXT: .cfi_undefined 2723 +; GFX900-NEXT: .cfi_undefined 2724 +; GFX900-NEXT: .cfi_undefined 2725 +; GFX900-NEXT: .cfi_undefined 2726 +; GFX900-NEXT: .cfi_undefined 2727 +; GFX900-NEXT: .cfi_undefined 2736 +; GFX900-NEXT: .cfi_undefined 2737 +; GFX900-NEXT: .cfi_undefined 2738 +; GFX900-NEXT: .cfi_undefined 2739 +; GFX900-NEXT: .cfi_undefined 2740 +; GFX900-NEXT: .cfi_undefined 2741 +; GFX900-NEXT: .cfi_undefined 2742 +; GFX900-NEXT: .cfi_undefined 2743 +; GFX900-NEXT: .cfi_undefined 2752 +; GFX900-NEXT: .cfi_undefined 2753 +; GFX900-NEXT: .cfi_undefined 2754 +; GFX900-NEXT: .cfi_undefined 2755 +; GFX900-NEXT: .cfi_undefined 2756 +; GFX900-NEXT: .cfi_undefined 2757 +; GFX900-NEXT: .cfi_undefined 2758 +; GFX900-NEXT: .cfi_undefined 2759 +; GFX900-NEXT: .cfi_undefined 2768 +; GFX900-NEXT: .cfi_undefined 2769 +; GFX900-NEXT: .cfi_undefined 2770 +; GFX900-NEXT: .cfi_undefined 2771 +; GFX900-NEXT: .cfi_undefined 2772 +; GFX900-NEXT: .cfi_undefined 2773 +; GFX900-NEXT: .cfi_undefined 2774 +; GFX900-NEXT: .cfi_undefined 2775 +; GFX900-NEXT: .cfi_undefined 2784 +; GFX900-NEXT: .cfi_undefined 2785 +; GFX900-NEXT: .cfi_undefined 2786 +; GFX900-NEXT: .cfi_undefined 2787 +; GFX900-NEXT: .cfi_undefined 2788 +; GFX900-NEXT: .cfi_undefined 2789 +; GFX900-NEXT: .cfi_undefined 2790 +; GFX900-NEXT: .cfi_undefined 2791 +; GFX900-NEXT: .cfi_undefined 2800 +; GFX900-NEXT: .cfi_undefined 2801 +; GFX900-NEXT: .cfi_undefined 2802 +; GFX900-NEXT: .cfi_undefined 2803 +; GFX900-NEXT: .cfi_undefined 2804 +; GFX900-NEXT: .cfi_undefined 2805 +; GFX900-NEXT: .cfi_undefined 2806 +; GFX900-NEXT: .cfi_undefined 2807 +; GFX900-NEXT: .cfi_undefined 32 +; GFX900-NEXT: .cfi_undefined 33 +; GFX900-NEXT: .cfi_undefined 34 +; GFX900-NEXT: .cfi_undefined 35 +; GFX900-NEXT: .cfi_undefined 36 +; GFX900-NEXT: .cfi_undefined 37 +; GFX900-NEXT: .cfi_undefined 38 +; GFX900-NEXT: .cfi_undefined 39 +; GFX900-NEXT: .cfi_undefined 40 +; GFX900-NEXT: .cfi_undefined 41 +; GFX900-NEXT: .cfi_undefined 42 +; GFX900-NEXT: .cfi_undefined 43 +; GFX900-NEXT: .cfi_undefined 44 +; GFX900-NEXT: .cfi_undefined 45 +; GFX900-NEXT: .cfi_undefined 46 +; GFX900-NEXT: .cfi_undefined 47 +; GFX900-NEXT: .cfi_undefined 48 +; GFX900-NEXT: .cfi_undefined 49 +; GFX900-NEXT: .cfi_undefined 50 +; GFX900-NEXT: .cfi_undefined 51 +; GFX900-NEXT: .cfi_undefined 52 +; GFX900-NEXT: .cfi_undefined 53 +; GFX900-NEXT: .cfi_undefined 54 +; GFX900-NEXT: .cfi_undefined 55 +; GFX900-NEXT: .cfi_undefined 56 +; GFX900-NEXT: .cfi_undefined 57 +; GFX900-NEXT: .cfi_undefined 58 +; GFX900-NEXT: .cfi_undefined 59 +; GFX900-NEXT: .cfi_undefined 60 +; GFX900-NEXT: .cfi_undefined 61 +; GFX900-NEXT: .cfi_undefined 72 +; GFX900-NEXT: .cfi_undefined 73 +; GFX900-NEXT: .cfi_undefined 74 +; GFX900-NEXT: .cfi_undefined 75 +; GFX900-NEXT: .cfi_undefined 76 +; GFX900-NEXT: .cfi_undefined 77 +; GFX900-NEXT: .cfi_undefined 78 +; GFX900-NEXT: .cfi_undefined 79 +; GFX900-NEXT: .cfi_undefined 88 +; GFX900-NEXT: .cfi_undefined 89 +; GFX900-NEXT: .cfi_undefined 90 +; GFX900-NEXT: .cfi_undefined 91 +; GFX900-NEXT: .cfi_undefined 92 +; GFX900-NEXT: .cfi_undefined 93 +; GFX900-NEXT: .cfi_undefined 94 +; GFX900-NEXT: .cfi_undefined 95 +; GFX900-NEXT: .cfi_undefined 1096 +; GFX900-NEXT: .cfi_undefined 1097 +; GFX900-NEXT: .cfi_undefined 1098 +; GFX900-NEXT: .cfi_undefined 1099 +; GFX900-NEXT: .cfi_undefined 1100 +; GFX900-NEXT: .cfi_undefined 1101 +; GFX900-NEXT: .cfi_undefined 1102 +; GFX900-NEXT: .cfi_undefined 1103 +; GFX900-NEXT: .cfi_undefined 1112 +; GFX900-NEXT: .cfi_undefined 1113 +; GFX900-NEXT: .cfi_undefined 1114 +; GFX900-NEXT: .cfi_undefined 1115 +; GFX900-NEXT: .cfi_undefined 1116 +; GFX900-NEXT: .cfi_undefined 1117 +; GFX900-NEXT: .cfi_undefined 1118 +; GFX900-NEXT: .cfi_undefined 1119 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s16, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 ; GFX900-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_offset 2600, 0 ; GFX900-NEXT: s_mov_b64 exec, s[18:19] ; GFX900-NEXT: v_writelane_b32 v40, s16, 2 +; GFX900-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX900-NEXT: .cfi_def_cfa_register 65 ; GFX900-NEXT: s_addk_i32 s32, 0x400 ; GFX900-NEXT: v_writelane_b32 v40, s30, 0 ; GFX900-NEXT: s_getpc_b64 s[16:17] @@ -1067,6 +2012,7 @@ define hidden void @func_call_clobber() #0 { ; GFX900-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: .cfi_def_cfa_register 64 ; GFX900-NEXT: s_mov_b32 s33, s4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] @@ -1075,13 +2021,256 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-DIS: .Lfunc_begin3: ; GFX90A-V2A-DIS-NEXT: .cfi_startproc ; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 33 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 34 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 35 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 36 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 37 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 38 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 39 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 40 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 41 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 42 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 43 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 44 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 45 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 46 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 47 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 48 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 49 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 50 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 51 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 52 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 53 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 54 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 55 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 56 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 57 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 58 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 59 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 60 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 61 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 72 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 73 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 74 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 75 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 76 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 77 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 78 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 79 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 88 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 89 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 90 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 91 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 92 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 93 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 94 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 95 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1096 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1097 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1098 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1099 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1100 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1101 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1102 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1103 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1112 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1113 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1114 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1115 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1116 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1117 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1118 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1119 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s16, s33 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 ; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_offset 2600, 0 ; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 ; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-V2A-DIS-NEXT: s_getpc_b64 s[16:17] @@ -1096,6 +2285,7 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 64 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s4 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) ; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] @@ -1104,13 +2294,256 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-EN: .Lfunc_begin3: ; GFX90A-V2A-EN-NEXT: .cfi_startproc ; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 33 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 34 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 35 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 36 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 37 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 38 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 39 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 40 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 41 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 42 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 43 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 44 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 45 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 46 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 47 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 48 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 49 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 50 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 51 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 52 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 53 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 54 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 55 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 56 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 57 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 58 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 59 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 60 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 61 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 72 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 73 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 74 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 75 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 76 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 77 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 78 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 79 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 88 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 89 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 90 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 91 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 92 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 93 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 94 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 95 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1112 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1113 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1114 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1115 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1116 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1117 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1118 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1119 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: s_mov_b32 s16, s33 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 ; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_offset 2600, 0 ; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 ; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-V2A-EN-NEXT: s_getpc_b64 s[16:17] @@ -1125,6 +2558,7 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX90A-V2A-EN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 64 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s4 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) ; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] @@ -1133,19 +2567,230 @@ define hidden void @func_call_clobber() #0 { ; WAVE32: .Lfunc_begin3: ; WAVE32-NEXT: .cfi_startproc ; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: .cfi_undefined 32 +; WAVE32-NEXT: .cfi_undefined 33 +; WAVE32-NEXT: .cfi_undefined 34 +; WAVE32-NEXT: .cfi_undefined 35 +; WAVE32-NEXT: .cfi_undefined 36 +; WAVE32-NEXT: .cfi_undefined 37 +; WAVE32-NEXT: .cfi_undefined 38 +; WAVE32-NEXT: .cfi_undefined 39 +; WAVE32-NEXT: .cfi_undefined 40 +; WAVE32-NEXT: .cfi_undefined 41 +; WAVE32-NEXT: .cfi_undefined 42 +; WAVE32-NEXT: .cfi_undefined 43 +; WAVE32-NEXT: .cfi_undefined 44 +; WAVE32-NEXT: .cfi_undefined 45 +; WAVE32-NEXT: .cfi_undefined 46 +; WAVE32-NEXT: .cfi_undefined 47 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: .cfi_undefined 50 +; WAVE32-NEXT: .cfi_undefined 51 +; WAVE32-NEXT: .cfi_undefined 52 +; WAVE32-NEXT: .cfi_undefined 53 +; WAVE32-NEXT: .cfi_undefined 54 +; WAVE32-NEXT: .cfi_undefined 55 +; WAVE32-NEXT: .cfi_undefined 56 +; WAVE32-NEXT: .cfi_undefined 57 +; WAVE32-NEXT: .cfi_undefined 58 +; WAVE32-NEXT: .cfi_undefined 59 +; WAVE32-NEXT: .cfi_undefined 60 +; WAVE32-NEXT: .cfi_undefined 61 +; WAVE32-NEXT: .cfi_undefined 72 +; WAVE32-NEXT: .cfi_undefined 73 +; WAVE32-NEXT: .cfi_undefined 74 +; WAVE32-NEXT: .cfi_undefined 75 +; WAVE32-NEXT: .cfi_undefined 76 +; WAVE32-NEXT: .cfi_undefined 77 +; WAVE32-NEXT: .cfi_undefined 78 +; WAVE32-NEXT: .cfi_undefined 79 +; WAVE32-NEXT: .cfi_undefined 88 +; WAVE32-NEXT: .cfi_undefined 89 +; WAVE32-NEXT: .cfi_undefined 90 +; WAVE32-NEXT: .cfi_undefined 91 +; WAVE32-NEXT: .cfi_undefined 92 +; WAVE32-NEXT: .cfi_undefined 93 +; WAVE32-NEXT: .cfi_undefined 94 +; WAVE32-NEXT: .cfi_undefined 95 +; WAVE32-NEXT: .cfi_undefined 1096 +; WAVE32-NEXT: .cfi_undefined 1097 +; WAVE32-NEXT: .cfi_undefined 1098 +; WAVE32-NEXT: .cfi_undefined 1099 +; WAVE32-NEXT: .cfi_undefined 1100 +; WAVE32-NEXT: .cfi_undefined 1101 +; WAVE32-NEXT: .cfi_undefined 1102 +; WAVE32-NEXT: .cfi_undefined 1103 +; WAVE32-NEXT: .cfi_undefined 1112 +; WAVE32-NEXT: .cfi_undefined 1113 +; WAVE32-NEXT: .cfi_undefined 1114 +; WAVE32-NEXT: .cfi_undefined 1115 +; WAVE32-NEXT: .cfi_undefined 1116 +; WAVE32-NEXT: .cfi_undefined 1117 +; WAVE32-NEXT: .cfi_undefined 1118 +; WAVE32-NEXT: .cfi_undefined 1119 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: s_mov_b32 s16, s33 ; WAVE32-NEXT: s_mov_b32 s33, s32 ; WAVE32-NEXT: s_or_saveexec_b32 s17, -1 ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1576, 0 ; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 ; WAVE32-NEXT: s_mov_b32 exec_lo, s17 ; WAVE32-NEXT: v_writelane_b32 v40, s16, 2 +; WAVE32-NEXT: .cfi_llvm_vector_registers 65, 1576, 2, 32 +; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 ; WAVE32-NEXT: s_addk_i32 s32, 0x200 ; WAVE32-NEXT: s_getpc_b64 s[16:17] ; WAVE32-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; WAVE32-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 ; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 ; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 @@ -1156,6 +2801,7 @@ define hidden void @func_call_clobber() #0 { ; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 ; WAVE32-NEXT: s_mov_b32 exec_lo, s5 +; WAVE32-NEXT: .cfi_def_cfa_register 64 ; WAVE32-NEXT: s_mov_b32 s33, s4 ; WAVE32-NEXT: s_waitcnt vmcnt(0) ; WAVE32-NEXT: s_setpc_b64 s[30:31] @@ -1169,6 +2815,8 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX900: .Lfunc_begin4: ; GFX900-NEXT: .cfi_startproc ; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill @@ -1193,6 +2841,8 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX90A-V2A-DIS: .Lfunc_begin4: ; GFX90A-V2A-DIS-NEXT: .cfi_startproc ; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill @@ -1221,6 +2871,12 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX90A-V2A-EN: .Lfunc_begin4: ; GFX90A-V2A-EN-NEXT: .cfi_startproc ; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse @@ -1248,6 +2904,8 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; WAVE32: .Lfunc_begin4: ; WAVE32-NEXT: .cfi_startproc ; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill @@ -1281,6 +2939,8 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX900: .Lfunc_begin5: ; GFX900-NEXT: .cfi_startproc ; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill @@ -1305,6 +2965,8 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX90A-V2A-DIS: .Lfunc_begin5: ; GFX90A-V2A-DIS-NEXT: .cfi_startproc ; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill @@ -1333,6 +2995,12 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX90A-V2A-EN: .Lfunc_begin5: ; GFX90A-V2A-EN-NEXT: .cfi_startproc ; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse @@ -1360,6 +3028,8 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; WAVE32: .Lfunc_begin5: ; WAVE32-NEXT: .cfi_startproc ; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index 40cdfd76d6af6..a0c25b2a0beb3 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -15,13 +15,480 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 2566 +; CHECK-NEXT: .cfi_undefined 2567 +; CHECK-NEXT: .cfi_undefined 2568 +; CHECK-NEXT: .cfi_undefined 2569 +; CHECK-NEXT: .cfi_undefined 2570 +; CHECK-NEXT: .cfi_undefined 2571 +; CHECK-NEXT: .cfi_undefined 2572 +; CHECK-NEXT: .cfi_undefined 2573 +; CHECK-NEXT: .cfi_undefined 2574 +; CHECK-NEXT: .cfi_undefined 2575 +; CHECK-NEXT: .cfi_undefined 2576 +; CHECK-NEXT: .cfi_undefined 2577 +; CHECK-NEXT: .cfi_undefined 2578 +; CHECK-NEXT: .cfi_undefined 2579 +; CHECK-NEXT: .cfi_undefined 2580 +; CHECK-NEXT: .cfi_undefined 2581 +; CHECK-NEXT: .cfi_undefined 2582 +; CHECK-NEXT: .cfi_undefined 2583 +; CHECK-NEXT: .cfi_undefined 2584 +; CHECK-NEXT: .cfi_undefined 2585 +; CHECK-NEXT: .cfi_undefined 2586 +; CHECK-NEXT: .cfi_undefined 2587 +; CHECK-NEXT: .cfi_undefined 2588 +; CHECK-NEXT: .cfi_undefined 2589 +; CHECK-NEXT: .cfi_undefined 2590 +; CHECK-NEXT: .cfi_undefined 2591 +; CHECK-NEXT: .cfi_undefined 2592 +; CHECK-NEXT: .cfi_undefined 2593 +; CHECK-NEXT: .cfi_undefined 2594 +; CHECK-NEXT: .cfi_undefined 2595 +; CHECK-NEXT: .cfi_undefined 2596 +; CHECK-NEXT: .cfi_undefined 2597 +; CHECK-NEXT: .cfi_undefined 2598 +; CHECK-NEXT: .cfi_undefined 2599 +; CHECK-NEXT: .cfi_undefined 2608 +; CHECK-NEXT: .cfi_undefined 2609 +; CHECK-NEXT: .cfi_undefined 2610 +; CHECK-NEXT: .cfi_undefined 2611 +; CHECK-NEXT: .cfi_undefined 2612 +; CHECK-NEXT: .cfi_undefined 2613 +; CHECK-NEXT: .cfi_undefined 2614 +; CHECK-NEXT: .cfi_undefined 2615 +; CHECK-NEXT: .cfi_undefined 2624 +; CHECK-NEXT: .cfi_undefined 2625 +; CHECK-NEXT: .cfi_undefined 2626 +; CHECK-NEXT: .cfi_undefined 2627 +; CHECK-NEXT: .cfi_undefined 2628 +; CHECK-NEXT: .cfi_undefined 2629 +; CHECK-NEXT: .cfi_undefined 2630 +; CHECK-NEXT: .cfi_undefined 2631 +; CHECK-NEXT: .cfi_undefined 2640 +; CHECK-NEXT: .cfi_undefined 2641 +; CHECK-NEXT: .cfi_undefined 2642 +; CHECK-NEXT: .cfi_undefined 2643 +; CHECK-NEXT: .cfi_undefined 2644 +; CHECK-NEXT: .cfi_undefined 2645 +; CHECK-NEXT: .cfi_undefined 2646 +; CHECK-NEXT: .cfi_undefined 2647 +; CHECK-NEXT: .cfi_undefined 2656 +; CHECK-NEXT: .cfi_undefined 2657 +; CHECK-NEXT: .cfi_undefined 2658 +; CHECK-NEXT: .cfi_undefined 2659 +; CHECK-NEXT: .cfi_undefined 2660 +; CHECK-NEXT: .cfi_undefined 2661 +; CHECK-NEXT: .cfi_undefined 2662 +; CHECK-NEXT: .cfi_undefined 2663 +; CHECK-NEXT: .cfi_undefined 2672 +; CHECK-NEXT: .cfi_undefined 2673 +; CHECK-NEXT: .cfi_undefined 2674 +; CHECK-NEXT: .cfi_undefined 2675 +; CHECK-NEXT: .cfi_undefined 2676 +; CHECK-NEXT: .cfi_undefined 2677 +; CHECK-NEXT: .cfi_undefined 2678 +; CHECK-NEXT: .cfi_undefined 2679 +; CHECK-NEXT: .cfi_undefined 2688 +; CHECK-NEXT: .cfi_undefined 2689 +; CHECK-NEXT: .cfi_undefined 2690 +; CHECK-NEXT: .cfi_undefined 2691 +; CHECK-NEXT: .cfi_undefined 2692 +; CHECK-NEXT: .cfi_undefined 2693 +; CHECK-NEXT: .cfi_undefined 2694 +; CHECK-NEXT: .cfi_undefined 2695 +; CHECK-NEXT: .cfi_undefined 2704 +; CHECK-NEXT: .cfi_undefined 2705 +; CHECK-NEXT: .cfi_undefined 2706 +; CHECK-NEXT: .cfi_undefined 2707 +; CHECK-NEXT: .cfi_undefined 2708 +; CHECK-NEXT: .cfi_undefined 2709 +; CHECK-NEXT: .cfi_undefined 2710 +; CHECK-NEXT: .cfi_undefined 2711 +; CHECK-NEXT: .cfi_undefined 2720 +; CHECK-NEXT: .cfi_undefined 2721 +; CHECK-NEXT: .cfi_undefined 2722 +; CHECK-NEXT: .cfi_undefined 2723 +; CHECK-NEXT: .cfi_undefined 2724 +; CHECK-NEXT: .cfi_undefined 2725 +; CHECK-NEXT: .cfi_undefined 2726 +; CHECK-NEXT: .cfi_undefined 2727 +; CHECK-NEXT: .cfi_undefined 2736 +; CHECK-NEXT: .cfi_undefined 2737 +; CHECK-NEXT: .cfi_undefined 2738 +; CHECK-NEXT: .cfi_undefined 2739 +; CHECK-NEXT: .cfi_undefined 2740 +; CHECK-NEXT: .cfi_undefined 2741 +; CHECK-NEXT: .cfi_undefined 2742 +; CHECK-NEXT: .cfi_undefined 2743 +; CHECK-NEXT: .cfi_undefined 2752 +; CHECK-NEXT: .cfi_undefined 2753 +; CHECK-NEXT: .cfi_undefined 2754 +; CHECK-NEXT: .cfi_undefined 2755 +; CHECK-NEXT: .cfi_undefined 2756 +; CHECK-NEXT: .cfi_undefined 2757 +; CHECK-NEXT: .cfi_undefined 2758 +; CHECK-NEXT: .cfi_undefined 2759 +; CHECK-NEXT: .cfi_undefined 2768 +; CHECK-NEXT: .cfi_undefined 2769 +; CHECK-NEXT: .cfi_undefined 2770 +; CHECK-NEXT: .cfi_undefined 2771 +; CHECK-NEXT: .cfi_undefined 2772 +; CHECK-NEXT: .cfi_undefined 2773 +; CHECK-NEXT: .cfi_undefined 2774 +; CHECK-NEXT: .cfi_undefined 2775 +; CHECK-NEXT: .cfi_undefined 2784 +; CHECK-NEXT: .cfi_undefined 2785 +; CHECK-NEXT: .cfi_undefined 2786 +; CHECK-NEXT: .cfi_undefined 2787 +; CHECK-NEXT: .cfi_undefined 2788 +; CHECK-NEXT: .cfi_undefined 2789 +; CHECK-NEXT: .cfi_undefined 2790 +; CHECK-NEXT: .cfi_undefined 2791 +; CHECK-NEXT: .cfi_undefined 2800 +; CHECK-NEXT: .cfi_undefined 2801 +; CHECK-NEXT: .cfi_undefined 2802 +; CHECK-NEXT: .cfi_undefined 2803 +; CHECK-NEXT: .cfi_undefined 2804 +; CHECK-NEXT: .cfi_undefined 2805 +; CHECK-NEXT: .cfi_undefined 2806 +; CHECK-NEXT: .cfi_undefined 2807 +; CHECK-NEXT: .cfi_undefined 3072 +; CHECK-NEXT: .cfi_undefined 3073 +; CHECK-NEXT: .cfi_undefined 3074 +; CHECK-NEXT: .cfi_undefined 3075 +; CHECK-NEXT: .cfi_undefined 3076 +; CHECK-NEXT: .cfi_undefined 3077 +; CHECK-NEXT: .cfi_undefined 3078 +; CHECK-NEXT: .cfi_undefined 3079 +; CHECK-NEXT: .cfi_undefined 3080 +; CHECK-NEXT: .cfi_undefined 3081 +; CHECK-NEXT: .cfi_undefined 3082 +; CHECK-NEXT: .cfi_undefined 3083 +; CHECK-NEXT: .cfi_undefined 3084 +; CHECK-NEXT: .cfi_undefined 3085 +; CHECK-NEXT: .cfi_undefined 3086 +; CHECK-NEXT: .cfi_undefined 3087 +; CHECK-NEXT: .cfi_undefined 3088 +; CHECK-NEXT: .cfi_undefined 3089 +; CHECK-NEXT: .cfi_undefined 3090 +; CHECK-NEXT: .cfi_undefined 3091 +; CHECK-NEXT: .cfi_undefined 3092 +; CHECK-NEXT: .cfi_undefined 3093 +; CHECK-NEXT: .cfi_undefined 3094 +; CHECK-NEXT: .cfi_undefined 3095 +; CHECK-NEXT: .cfi_undefined 3096 +; CHECK-NEXT: .cfi_undefined 3097 +; CHECK-NEXT: .cfi_undefined 3098 +; CHECK-NEXT: .cfi_undefined 3099 +; CHECK-NEXT: .cfi_undefined 3100 +; CHECK-NEXT: .cfi_undefined 3101 +; CHECK-NEXT: .cfi_undefined 3102 +; CHECK-NEXT: .cfi_undefined 3103 +; CHECK-NEXT: .cfi_undefined 3104 +; CHECK-NEXT: .cfi_undefined 3105 +; CHECK-NEXT: .cfi_undefined 3106 +; CHECK-NEXT: .cfi_undefined 3107 +; CHECK-NEXT: .cfi_undefined 3108 +; CHECK-NEXT: .cfi_undefined 3109 +; CHECK-NEXT: .cfi_undefined 3110 +; CHECK-NEXT: .cfi_undefined 3111 +; CHECK-NEXT: .cfi_undefined 3112 +; CHECK-NEXT: .cfi_undefined 3113 +; CHECK-NEXT: .cfi_undefined 3114 +; CHECK-NEXT: .cfi_undefined 3115 +; CHECK-NEXT: .cfi_undefined 3116 +; CHECK-NEXT: .cfi_undefined 3117 +; CHECK-NEXT: .cfi_undefined 3118 +; CHECK-NEXT: .cfi_undefined 3119 +; CHECK-NEXT: .cfi_undefined 3120 +; CHECK-NEXT: .cfi_undefined 3121 +; CHECK-NEXT: .cfi_undefined 3122 +; CHECK-NEXT: .cfi_undefined 3123 +; CHECK-NEXT: .cfi_undefined 3124 +; CHECK-NEXT: .cfi_undefined 3125 +; CHECK-NEXT: .cfi_undefined 3126 +; CHECK-NEXT: .cfi_undefined 3127 +; CHECK-NEXT: .cfi_undefined 3128 +; CHECK-NEXT: .cfi_undefined 3129 +; CHECK-NEXT: .cfi_undefined 3130 +; CHECK-NEXT: .cfi_undefined 3131 +; CHECK-NEXT: .cfi_undefined 3132 +; CHECK-NEXT: .cfi_undefined 3133 +; CHECK-NEXT: .cfi_undefined 3134 +; CHECK-NEXT: .cfi_undefined 3135 +; CHECK-NEXT: .cfi_undefined 3136 +; CHECK-NEXT: .cfi_undefined 3137 +; CHECK-NEXT: .cfi_undefined 3138 +; CHECK-NEXT: .cfi_undefined 3139 +; CHECK-NEXT: .cfi_undefined 3140 +; CHECK-NEXT: .cfi_undefined 3141 +; CHECK-NEXT: .cfi_undefined 3142 +; CHECK-NEXT: .cfi_undefined 3143 +; CHECK-NEXT: .cfi_undefined 3144 +; CHECK-NEXT: .cfi_undefined 3145 +; CHECK-NEXT: .cfi_undefined 3146 +; CHECK-NEXT: .cfi_undefined 3147 +; CHECK-NEXT: .cfi_undefined 3148 +; CHECK-NEXT: .cfi_undefined 3149 +; CHECK-NEXT: .cfi_undefined 3150 +; CHECK-NEXT: .cfi_undefined 3151 +; CHECK-NEXT: .cfi_undefined 3152 +; CHECK-NEXT: .cfi_undefined 3153 +; CHECK-NEXT: .cfi_undefined 3154 +; CHECK-NEXT: .cfi_undefined 3155 +; CHECK-NEXT: .cfi_undefined 3156 +; CHECK-NEXT: .cfi_undefined 3157 +; CHECK-NEXT: .cfi_undefined 3158 +; CHECK-NEXT: .cfi_undefined 3159 +; CHECK-NEXT: .cfi_undefined 3160 +; CHECK-NEXT: .cfi_undefined 3161 +; CHECK-NEXT: .cfi_undefined 3162 +; CHECK-NEXT: .cfi_undefined 3163 +; CHECK-NEXT: .cfi_undefined 3164 +; CHECK-NEXT: .cfi_undefined 3165 +; CHECK-NEXT: .cfi_undefined 3166 +; CHECK-NEXT: .cfi_undefined 3167 +; CHECK-NEXT: .cfi_undefined 3168 +; CHECK-NEXT: .cfi_undefined 3169 +; CHECK-NEXT: .cfi_undefined 3170 +; CHECK-NEXT: .cfi_undefined 3171 +; CHECK-NEXT: .cfi_undefined 3172 +; CHECK-NEXT: .cfi_undefined 3173 +; CHECK-NEXT: .cfi_undefined 3174 +; CHECK-NEXT: .cfi_undefined 3175 +; CHECK-NEXT: .cfi_undefined 3176 +; CHECK-NEXT: .cfi_undefined 3177 +; CHECK-NEXT: .cfi_undefined 3178 +; CHECK-NEXT: .cfi_undefined 3179 +; CHECK-NEXT: .cfi_undefined 3180 +; CHECK-NEXT: .cfi_undefined 3181 +; CHECK-NEXT: .cfi_undefined 3182 +; CHECK-NEXT: .cfi_undefined 3183 +; CHECK-NEXT: .cfi_undefined 3184 +; CHECK-NEXT: .cfi_undefined 3185 +; CHECK-NEXT: .cfi_undefined 3186 +; CHECK-NEXT: .cfi_undefined 3187 +; CHECK-NEXT: .cfi_undefined 3188 +; CHECK-NEXT: .cfi_undefined 3189 +; CHECK-NEXT: .cfi_undefined 3190 +; CHECK-NEXT: .cfi_undefined 3191 +; CHECK-NEXT: .cfi_undefined 3192 +; CHECK-NEXT: .cfi_undefined 3193 +; CHECK-NEXT: .cfi_undefined 3194 +; CHECK-NEXT: .cfi_undefined 3195 +; CHECK-NEXT: .cfi_undefined 3196 +; CHECK-NEXT: .cfi_undefined 3197 +; CHECK-NEXT: .cfi_undefined 3198 +; CHECK-NEXT: .cfi_undefined 3199 +; CHECK-NEXT: .cfi_undefined 3200 +; CHECK-NEXT: .cfi_undefined 3201 +; CHECK-NEXT: .cfi_undefined 3202 +; CHECK-NEXT: .cfi_undefined 3203 +; CHECK-NEXT: .cfi_undefined 3204 +; CHECK-NEXT: .cfi_undefined 3205 +; CHECK-NEXT: .cfi_undefined 3206 +; CHECK-NEXT: .cfi_undefined 3207 +; CHECK-NEXT: .cfi_undefined 3208 +; CHECK-NEXT: .cfi_undefined 3209 +; CHECK-NEXT: .cfi_undefined 3210 +; CHECK-NEXT: .cfi_undefined 3211 +; CHECK-NEXT: .cfi_undefined 3212 +; CHECK-NEXT: .cfi_undefined 3213 +; CHECK-NEXT: .cfi_undefined 3214 +; CHECK-NEXT: .cfi_undefined 3215 +; CHECK-NEXT: .cfi_undefined 3216 +; CHECK-NEXT: .cfi_undefined 3217 +; CHECK-NEXT: .cfi_undefined 3218 +; CHECK-NEXT: .cfi_undefined 3219 +; CHECK-NEXT: .cfi_undefined 3220 +; CHECK-NEXT: .cfi_undefined 3221 +; CHECK-NEXT: .cfi_undefined 3222 +; CHECK-NEXT: .cfi_undefined 3223 +; CHECK-NEXT: .cfi_undefined 3224 +; CHECK-NEXT: .cfi_undefined 3225 +; CHECK-NEXT: .cfi_undefined 3226 +; CHECK-NEXT: .cfi_undefined 3227 +; CHECK-NEXT: .cfi_undefined 3228 +; CHECK-NEXT: .cfi_undefined 3229 +; CHECK-NEXT: .cfi_undefined 3230 +; CHECK-NEXT: .cfi_undefined 3231 +; CHECK-NEXT: .cfi_undefined 3232 +; CHECK-NEXT: .cfi_undefined 3233 +; CHECK-NEXT: .cfi_undefined 3234 +; CHECK-NEXT: .cfi_undefined 3235 +; CHECK-NEXT: .cfi_undefined 3236 +; CHECK-NEXT: .cfi_undefined 3237 +; CHECK-NEXT: .cfi_undefined 3238 +; CHECK-NEXT: .cfi_undefined 3239 +; CHECK-NEXT: .cfi_undefined 3240 +; CHECK-NEXT: .cfi_undefined 3241 +; CHECK-NEXT: .cfi_undefined 3242 +; CHECK-NEXT: .cfi_undefined 3243 +; CHECK-NEXT: .cfi_undefined 3244 +; CHECK-NEXT: .cfi_undefined 3245 +; CHECK-NEXT: .cfi_undefined 3246 +; CHECK-NEXT: .cfi_undefined 3247 +; CHECK-NEXT: .cfi_undefined 3248 +; CHECK-NEXT: .cfi_undefined 3249 +; CHECK-NEXT: .cfi_undefined 3250 +; CHECK-NEXT: .cfi_undefined 3251 +; CHECK-NEXT: .cfi_undefined 3252 +; CHECK-NEXT: .cfi_undefined 3253 +; CHECK-NEXT: .cfi_undefined 3254 +; CHECK-NEXT: .cfi_undefined 3255 +; CHECK-NEXT: .cfi_undefined 3256 +; CHECK-NEXT: .cfi_undefined 3257 +; CHECK-NEXT: .cfi_undefined 3258 +; CHECK-NEXT: .cfi_undefined 3259 +; CHECK-NEXT: .cfi_undefined 3260 +; CHECK-NEXT: .cfi_undefined 3261 +; CHECK-NEXT: .cfi_undefined 3262 +; CHECK-NEXT: .cfi_undefined 3263 +; CHECK-NEXT: .cfi_undefined 3264 +; CHECK-NEXT: .cfi_undefined 3265 +; CHECK-NEXT: .cfi_undefined 3266 +; CHECK-NEXT: .cfi_undefined 3267 +; CHECK-NEXT: .cfi_undefined 3268 +; CHECK-NEXT: .cfi_undefined 3269 +; CHECK-NEXT: .cfi_undefined 3270 +; CHECK-NEXT: .cfi_undefined 3271 +; CHECK-NEXT: .cfi_undefined 3272 +; CHECK-NEXT: .cfi_undefined 3273 +; CHECK-NEXT: .cfi_undefined 3274 +; CHECK-NEXT: .cfi_undefined 3275 +; CHECK-NEXT: .cfi_undefined 3276 +; CHECK-NEXT: .cfi_undefined 3277 +; CHECK-NEXT: .cfi_undefined 3278 +; CHECK-NEXT: .cfi_undefined 3279 +; CHECK-NEXT: .cfi_undefined 3280 +; CHECK-NEXT: .cfi_undefined 3281 +; CHECK-NEXT: .cfi_undefined 3282 +; CHECK-NEXT: .cfi_undefined 3283 +; CHECK-NEXT: .cfi_undefined 3284 +; CHECK-NEXT: .cfi_undefined 3285 +; CHECK-NEXT: .cfi_undefined 3286 +; CHECK-NEXT: .cfi_undefined 3287 +; CHECK-NEXT: .cfi_undefined 3288 +; CHECK-NEXT: .cfi_undefined 3289 +; CHECK-NEXT: .cfi_undefined 3290 +; CHECK-NEXT: .cfi_undefined 3291 +; CHECK-NEXT: .cfi_undefined 3292 +; CHECK-NEXT: .cfi_undefined 3293 +; CHECK-NEXT: .cfi_undefined 3294 +; CHECK-NEXT: .cfi_undefined 3295 +; CHECK-NEXT: .cfi_undefined 3296 +; CHECK-NEXT: .cfi_undefined 3297 +; CHECK-NEXT: .cfi_undefined 3298 +; CHECK-NEXT: .cfi_undefined 3299 +; CHECK-NEXT: .cfi_undefined 3300 +; CHECK-NEXT: .cfi_undefined 3301 +; CHECK-NEXT: .cfi_undefined 3302 +; CHECK-NEXT: .cfi_undefined 3303 +; CHECK-NEXT: .cfi_undefined 3304 +; CHECK-NEXT: .cfi_undefined 3305 +; CHECK-NEXT: .cfi_undefined 3306 +; CHECK-NEXT: .cfi_undefined 3307 +; CHECK-NEXT: .cfi_undefined 3308 +; CHECK-NEXT: .cfi_undefined 3309 +; CHECK-NEXT: .cfi_undefined 3310 +; CHECK-NEXT: .cfi_undefined 3311 +; CHECK-NEXT: .cfi_undefined 3312 +; CHECK-NEXT: .cfi_undefined 3313 +; CHECK-NEXT: .cfi_undefined 3314 +; CHECK-NEXT: .cfi_undefined 3315 +; CHECK-NEXT: .cfi_undefined 3316 +; CHECK-NEXT: .cfi_undefined 3317 +; CHECK-NEXT: .cfi_undefined 3318 +; CHECK-NEXT: .cfi_undefined 3319 +; CHECK-NEXT: .cfi_undefined 3320 +; CHECK-NEXT: .cfi_undefined 3321 +; CHECK-NEXT: .cfi_undefined 3322 +; CHECK-NEXT: .cfi_undefined 3323 +; CHECK-NEXT: .cfi_undefined 3324 +; CHECK-NEXT: .cfi_undefined 3325 +; CHECK-NEXT: .cfi_undefined 3326 +; CHECK-NEXT: .cfi_undefined 3327 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 72 +; CHECK-NEXT: .cfi_undefined 73 +; CHECK-NEXT: .cfi_undefined 74 +; CHECK-NEXT: .cfi_undefined 75 +; CHECK-NEXT: .cfi_undefined 76 +; CHECK-NEXT: .cfi_undefined 77 +; CHECK-NEXT: .cfi_undefined 78 +; CHECK-NEXT: .cfi_undefined 79 +; CHECK-NEXT: .cfi_undefined 88 +; CHECK-NEXT: .cfi_undefined 89 +; CHECK-NEXT: .cfi_undefined 90 +; CHECK-NEXT: .cfi_undefined 91 +; CHECK-NEXT: .cfi_undefined 92 +; CHECK-NEXT: .cfi_undefined 93 +; CHECK-NEXT: .cfi_undefined 94 +; CHECK-NEXT: .cfi_undefined 95 +; CHECK-NEXT: .cfi_undefined 1096 +; CHECK-NEXT: .cfi_undefined 1097 +; CHECK-NEXT: .cfi_undefined 1098 +; CHECK-NEXT: .cfi_undefined 1099 +; CHECK-NEXT: .cfi_undefined 1100 +; CHECK-NEXT: .cfi_undefined 1101 +; CHECK-NEXT: .cfi_undefined 1102 +; CHECK-NEXT: .cfi_undefined 1103 +; CHECK-NEXT: .cfi_undefined 1112 +; CHECK-NEXT: .cfi_undefined 1113 +; CHECK-NEXT: .cfi_undefined 1114 +; CHECK-NEXT: .cfi_undefined 1115 +; CHECK-NEXT: .cfi_undefined 1116 +; CHECK-NEXT: .cfi_undefined 1117 +; CHECK-NEXT: .cfi_undefined 1118 +; CHECK-NEXT: .cfi_undefined 1119 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2601, 256 ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v41, s16, 16 +; CHECK-NEXT: .cfi_llvm_vector_registers 65, 2601, 16, 32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: v_writelane_b32 v41, s30, 0 ; CHECK-NEXT: v_writelane_b32 v41, s31, 1 ; CHECK-NEXT: v_writelane_b32 v41, s34, 2 @@ -96,6 +563,7 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll index bcccf50e3805c..c05eef51c276f 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll @@ -287,10 +287,10 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 +; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 -; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 @@ -327,10 +327,10 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 +; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 -; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll index c5db7a33f70e0..ed767aeaf112f 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -1049,12 +1049,12 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1078,12 +1078,12 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1107,13 +1107,14 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1136,13 +1137,14 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1171,15 +1173,15 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s34 +; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1205,15 +1207,15 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 +; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1239,19 +1241,20 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-SDAG-NEXT: s_mov_b32 s5, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 -; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 ; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 ; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 ; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 @@ -1261,31 +1264,33 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0xfff ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 10 ; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s5 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 ; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 ; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 @@ -1296,12 +1301,13 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 10 ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 ; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 ; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, i32 %n, align 128, addrspace(5) store volatile i32 10, ptr addrspace(5) %alloca @@ -1312,12 +1318,12 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1341,12 +1347,12 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1370,13 +1376,14 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1399,13 +1406,14 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1434,13 +1442,13 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1464,13 +1472,13 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1494,14 +1502,14 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -1525,14 +1533,14 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1566,6 +1574,7 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-SDAG-NEXT: s_mov_b32 s10, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s34 +; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 @@ -1575,7 +1584,6 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0 -; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GFX9-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[4:5] ; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 @@ -1598,16 +1606,16 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 +; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1633,19 +1641,20 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 +; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 ; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo ; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0xfff ; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff000 -; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1657,28 +1666,29 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-SDAG-NEXT: ; %bb.2: ; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s0 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc -; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s0 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1693,12 +1703,13 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 ; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 ; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %idx = call i32 @llvm.amdgcn.workitem.id.x() %alloca = alloca i32, i32 %idx, align 128, addrspace(5) @@ -1710,13 +1721,13 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1740,13 +1751,13 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1770,14 +1781,14 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -1801,14 +1812,14 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1842,10 +1853,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_mov_b32 s13, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-SDAG-NEXT: s_mov_b32 s14, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x3000 ; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-SDAG-NEXT: s_cbranch_execz .LBB14_6 @@ -1925,10 +1936,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-GISEL-NEXT: s_mov_b32 s13, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-GISEL-NEXT: s_mov_b32 s14, s34 -; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x3000 ; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-GISEL-NEXT: s_cbranch_execz .LBB14_6 @@ -2007,10 +2018,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_mov_b32 s7, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 +; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0xc0 ; GFX11-SDAG-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-SDAG-NEXT: s_cbranch_execz .LBB14_6 @@ -2092,10 +2103,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-GISEL-NEXT: s_mov_b32 s7, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s8, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0xc0 ; GFX11-GISEL-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-GISEL-NEXT: s_cbranch_execz .LBB14_6 @@ -2192,10 +2203,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_mov_b32 s11, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-SDAG-NEXT: s_mov_b32 s12, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 -; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x2000 ; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -2258,10 +2269,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-GISEL-NEXT: s_mov_b32 s11, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-GISEL-NEXT: s_mov_b32 s12, s34 -; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 -; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x2000 ; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -2324,10 +2335,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s0, exec_lo ; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 +; GFX11-SDAG-NEXT: s_mov_b32 s0, exec_lo ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x80 ; GFX11-SDAG-NEXT: v_cmpx_ne_u32_e32 0, v0 ; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -2393,10 +2404,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-GISEL-NEXT: s_mov_b32 s5, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s6, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo ; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x80 ; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v0 ; GFX11-GISEL-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -2476,13 +2487,13 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2506,13 +2517,13 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2536,15 +2547,15 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.h, 0 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l -; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 ; GFX11-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -2568,14 +2579,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -2605,12 +2616,12 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2634,12 +2645,12 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2663,13 +2674,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -2692,13 +2704,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index dafd6cce2d878..15ef61fd75bad 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -21,21 +21,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 12, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -54,21 +66,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 12, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -88,21 +112,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 12, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7 @@ -121,21 +157,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 68, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -154,21 +202,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__literal - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 68, implicit-def $scc SI_RETURN implicit $sgpr7 @@ -188,21 +248,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -222,21 +294,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -258,6 +342,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -266,6 +353,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -274,6 +364,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -281,6 +374,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -304,6 +400,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -312,6 +411,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -320,6 +422,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -327,6 +432,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -351,6 +459,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc @@ -359,6 +470,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc @@ -367,6 +481,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -374,6 +491,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -398,6 +518,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc @@ -406,6 +529,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc @@ -414,6 +540,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -421,6 +550,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -750,6 +882,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc @@ -758,6 +893,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc @@ -766,6 +904,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -773,6 +914,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -851,6 +995,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc @@ -859,6 +1006,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc @@ -867,6 +1017,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -874,6 +1027,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -896,21 +1052,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -930,21 +1098,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -1061,21 +1241,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__0__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__0__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__0__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__0__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 0, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -1094,21 +1286,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__0 - ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__0 - ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -1130,6 +1334,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr7 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1138,6 +1345,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr7 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1146,6 +1356,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr7 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -1153,6 +1366,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr7 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -1176,6 +1392,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1184,6 +1403,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1192,6 +1414,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -1199,6 +1424,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -1222,6 +1450,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1230,6 +1461,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1238,6 +1472,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 @@ -1245,6 +1482,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 @@ -1488,6 +1728,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 @@ -1496,6 +1739,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 @@ -1504,6 +1750,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 @@ -1511,6 +1760,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 @@ -1536,6 +1788,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc @@ -1544,6 +1799,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc @@ -1552,6 +1810,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 @@ -1559,6 +1820,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 @@ -1583,6 +1847,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; MUBUFW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUFW64-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi @@ -1591,6 +1857,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; MUBUFW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUFW32-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi @@ -1599,6 +1867,8 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; FLATSCRW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; FLATSCRW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 @@ -1606,6 +1876,8 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; FLATSCRW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; FLATSCRW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 @@ -1629,24 +1901,36 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; MUBUFW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; MUBUFW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; MUBUFW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; FLATSCRW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; FLATSCRW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, %stack.0, implicit-def dead $scc @@ -1670,24 +1954,36 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; MUBUFW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; MUBUFW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; MUBUFW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; FLATSCRW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; FLATSCRW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, %stack.1, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir index 7f370b2cca658..d3a8d983bb22c 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir @@ -33,6 +33,214 @@ body: | ; GFX8-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX8: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX8-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX8-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX8-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -58,6 +266,214 @@ body: | ; GFX900-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX900-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX900-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX900-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -83,6 +499,246 @@ body: | ; GFX90A-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX90A: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX90A-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX90A-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -108,6 +764,214 @@ body: | ; GFX1010-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1010: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1010-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1010-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1010-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -133,6 +997,214 @@ body: | ; GFX1100-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1100: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1100-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1100-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1100-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -158,6 +1230,214 @@ body: | ; GFX1200-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1200: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1200-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1200-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1200-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -216,6 +1496,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -227,6 +1510,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -237,6 +1523,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -247,6 +1536,9 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1010-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1010-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1010-NEXT: $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec @@ -257,6 +1549,9 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1100-NEXT: $sgpr0 = S_ADDC_U32 $sgpr32, 64, implicit-def $scc, implicit $scc @@ -268,6 +1563,9 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1200-NEXT: $sgpr0 = S_ADDC_U32 $sgpr32, 64, implicit-def $scc, implicit $scc @@ -300,6 +1598,57 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -361,6 +1710,57 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -421,6 +1821,73 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec @@ -481,6 +1948,57 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -539,6 +2057,57 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) @@ -598,6 +2167,57 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) @@ -694,6 +2314,54 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -721,6 +2389,54 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -747,6 +2463,54 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -773,6 +2537,54 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -799,6 +2611,54 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -826,6 +2686,54 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -889,6 +2797,54 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -916,6 +2872,54 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -942,6 +2946,54 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -968,6 +3020,54 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -994,6 +3094,54 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -1021,6 +3169,54 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir index aecff1b13171d..48f1ab0ee3c30 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir @@ -21,21 +21,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 12, killed $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 12, killed $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_OR_B32 12, %stack.0, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -55,24 +67,36 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_or_b32__literal__fi_offset96 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_or_b32__literal__fi_offset96 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_or_b32__literal__fi_offset96 - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_or_b32__literal__fi_offset96 - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_OR_B32 68, %stack.1, implicit-def $scc @@ -96,6 +120,9 @@ body: | ; MUBUFW64-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -104,6 +131,9 @@ body: | ; MUBUFW32-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -112,6 +142,9 @@ body: | ; FLATSCRW64-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -119,6 +152,9 @@ body: | ; FLATSCRW32-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -143,6 +179,9 @@ body: | ; MUBUFW64-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -151,6 +190,9 @@ body: | ; MUBUFW32-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -159,6 +201,9 @@ body: | ; FLATSCRW64-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -166,6 +211,9 @@ body: | ; FLATSCRW32-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -190,6 +238,9 @@ body: | ; MUBUFW64-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -198,6 +249,9 @@ body: | ; MUBUFW32-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -206,6 +260,9 @@ body: | ; FLATSCRW64-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -213,6 +270,9 @@ body: | ; FLATSCRW32-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir index 2a4b305f32cef..fd296666514ad 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir @@ -16,11 +16,17 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0, implicit $sgpr0 @@ -39,12 +45,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc - ; MUBUFW32: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc - ; FLATSCRW32: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec @@ -64,12 +76,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -89,12 +107,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; MUBUFW32: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; FLATSCRW32: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -118,6 +142,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -125,6 +152,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -149,6 +179,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -156,6 +189,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -181,6 +217,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -188,6 +227,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -213,6 +255,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -220,6 +265,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -245,6 +293,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -252,6 +303,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -306,6 +360,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; MUBUFW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -314,6 +371,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; FLATSCRW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index ae53a3696fc2b..95d9f226c4634 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -22,12 +22,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec @@ -47,13 +53,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -75,12 +87,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec @@ -101,13 +119,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -128,12 +152,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.0, implicit-def dead $vcc, implicit $exec @@ -153,13 +183,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -181,12 +217,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.1, implicit-def dead $vcc, implicit $exec @@ -207,13 +249,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -237,6 +285,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_offset0 ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -244,6 +295,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec @@ -266,6 +320,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__vgpr ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -273,6 +330,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr1, implicit-def dead $vcc, implicit $exec @@ -296,6 +356,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -304,6 +367,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -328,6 +394,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -336,6 +405,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -360,6 +432,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -368,6 +443,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -389,13 +467,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec @@ -415,13 +499,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; FLATSCRW64: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -441,13 +531,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; FLATSCRW64: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -471,6 +567,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX7: liveins: $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -480,6 +579,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX8: liveins: $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -489,6 +591,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX900: liveins: $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -498,6 +603,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX90A: liveins: $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -507,6 +615,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX10: liveins: $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -515,6 +626,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX942: liveins: $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -524,6 +638,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX11: liveins: $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -531,6 +648,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX12: liveins: $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -555,6 +675,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX7: liveins: $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -564,6 +687,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX8: liveins: $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -573,6 +699,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX900: liveins: $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -582,6 +711,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX90A: liveins: $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -591,6 +723,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX10: liveins: $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -599,6 +734,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX942: liveins: $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -608,6 +746,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX11: liveins: $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -615,6 +756,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX12: liveins: $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -640,6 +784,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -649,6 +796,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -658,6 +808,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -667,6 +820,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -676,6 +832,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -684,6 +843,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -692,6 +854,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 @@ -699,6 +864,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 @@ -724,6 +892,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -733,6 +904,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -742,6 +916,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -751,6 +928,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -760,6 +940,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -768,6 +951,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -776,6 +962,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 @@ -783,6 +972,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 @@ -808,6 +1000,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -817,6 +1012,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -826,6 +1024,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -835,6 +1036,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -844,6 +1048,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -852,6 +1059,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -860,6 +1070,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -867,6 +1080,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -1081,6 +1297,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1093,6 +1312,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1105,6 +1327,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1117,6 +1342,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1129,6 +1357,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1140,6 +1371,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1152,6 +1386,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1160,6 +1397,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1188,6 +1428,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1199,6 +1442,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1230,6 +1476,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1241,6 +1490,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr8, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1798,6 +2050,11 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; MUBUFW64: liveins: $sgpr4 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1805,6 +2062,11 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX942: liveins: $sgpr4 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: SI_RETURN implicit $vgpr0 @@ -1812,12 +2074,22 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX11: liveins: $sgpr4 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $sgpr4, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX12: liveins: $sgpr4 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $sgpr4, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1875,6 +2147,11 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_func ; MUBUFW64: liveins: $vgpr0 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $vgpr0, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1882,6 +2159,11 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_func ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $vgpr0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $vgpr0, 0, implicit $exec @@ -2182,11 +2464,19 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2194,17 +2484,26 @@ body: | ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2212,102 +2511,157 @@ body: | ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2332,11 +2686,19 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2344,17 +2706,26 @@ body: | ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2362,105 +2733,160 @@ body: | ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2485,11 +2911,17 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2497,17 +2929,24 @@ body: | ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2515,102 +2954,145 @@ body: | ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2635,11 +3117,17 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288 @@ -2647,17 +3135,24 @@ body: | ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288 @@ -2665,105 +3160,148 @@ body: | ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index c5c9696ee355a..3b1ad0cf28e58 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -18,22 +18,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.0, implicit $exec @@ -54,22 +66,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.1, implicit $exec @@ -89,22 +113,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 68, %stack.0, implicit $exec @@ -125,22 +161,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 68, %stack.1, implicit $exec @@ -163,6 +211,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -170,6 +221,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -177,12 +231,18 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, %stack.0, implicit $exec @@ -205,6 +265,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -212,6 +275,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -219,12 +285,18 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, $vgpr1, implicit $exec @@ -248,6 +320,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -256,6 +331,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -264,6 +342,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -271,6 +352,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -295,6 +379,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -303,6 +390,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -311,6 +401,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -318,6 +411,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -342,6 +438,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; MUBUF: liveins: $sgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -350,6 +449,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -358,6 +460,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -366,6 +471,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -387,21 +495,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -424,6 +544,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; MUBUF: liveins: $sgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -433,6 +556,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -441,6 +567,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -450,6 +579,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -474,6 +606,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -483,6 +618,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 0, implicit $exec @@ -491,6 +629,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -499,6 +640,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -523,6 +667,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -532,6 +679,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 1, implicit $exec @@ -540,6 +690,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -548,6 +701,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -572,6 +728,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -581,6 +740,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -589,6 +751,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -597,6 +762,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1902,49 +2070,70 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; MUBUF: liveins: $sgpr4, $sgpr5 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; MUBUFW32: liveins: $sgpr4, $sgpr5 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc ; MUBUFW32-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; MUBUFW32-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUFW32-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; MUBUFW32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; FLATSCRW64: liveins: $sgpr4, $sgpr5 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCRW64-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; FLATSCRW64-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCRW64-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr6, 0, implicit $exec ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCRW64-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; FLATSCRW64-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index 81bd8baaa0e5d..6d54bb544fb8c 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -21,6 +21,9 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_divergent_i32 ; GCN: liveins: $vgpr31, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -55,6 +58,10 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 $sgpr0, 4, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec @@ -91,6 +98,12 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr0, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc @@ -132,6 +145,10 @@ body: | ; GCN-LABEL: name: func_other_fi_user_non_inline_imm_offset_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 7, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec ; GCN-NEXT: $sgpr5 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc @@ -168,6 +185,12 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32 ; GCN: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec @@ -204,6 +227,9 @@ body: | ; GCN-LABEL: name: func_frame_idx_at_the_end_of_bb ; GCN: liveins: $vgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -223,7 +249,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.0 S_ENDPGM 0, implicit $sgpr4 @@ -244,6 +273,9 @@ body: | ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc ; GCN: liveins: $sgpr4, $sgpr5 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec @@ -266,7 +298,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 64, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.1 @@ -285,7 +320,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.1 @@ -308,6 +346,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64 @@ -318,6 +359,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec @@ -327,6 +371,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec @@ -353,6 +400,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68 @@ -363,6 +413,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec @@ -372,6 +425,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec @@ -401,6 +457,49 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) @@ -461,6 +560,49 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) @@ -520,6 +662,65 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec @@ -615,6 +816,57 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -675,6 +927,57 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -734,6 +1037,73 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index 831d10480c51c..381b1741517b7 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -1727,12 +1727,12 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-NEXT: v_writelane_b32 v40, s19, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s19, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 @@ -1759,13 +1759,13 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX11-NEXT: s_or_saveexec_b32 s16, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s16 +; GFX11-NEXT: v_writelane_b32 v40, s3, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s3, 2 -; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2132,6 +2132,7 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[40:41] +; GFX9-NEXT: v_writelane_b32 v40, s29, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, s28 ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 ; GFX9-NEXT: v_mov_b32_e32 v5, s27 @@ -2140,13 +2141,13 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: v_mov_b32_e32 v2, s24 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32 -; GFX9-NEXT: v_writelane_b32 v40, s29, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, s23 ; GFX9-NEXT: v_mov_b32_e32 v4, s22 ; GFX9-NEXT: v_mov_b32_e32 v3, s21 ; GFX9-NEXT: v_mov_b32_e32 v2, s20 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v3, s17 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 ; GFX9-NEXT: s_getpc_b64 s[16:17] @@ -2155,7 +2156,6 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v5, s19 ; GFX9-NEXT: v_mov_b32_e32 v4, s18 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2178,6 +2178,7 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: s_or_saveexec_b32 s26, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s26 +; GFX11-NEXT: v_writelane_b32 v40, s25, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21 ; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19 @@ -2187,20 +2188,18 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17 ; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3 ; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0 -; GFX11-NEXT: v_writelane_b32 v40, s25, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23 ; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v10, s0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32 ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16 ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 234eaa8af7edf..3ca36a97981f2 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -162,13 +162,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -191,15 +191,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -221,13 +220,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 @@ -253,9 +252,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi @@ -284,13 +283,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 @@ -315,13 +314,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 @@ -346,13 +345,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 @@ -382,9 +381,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi @@ -413,13 +412,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 @@ -444,13 +443,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 @@ -475,13 +474,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 @@ -540,11 +539,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -568,11 +567,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -596,11 +595,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -625,11 +624,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -656,9 +655,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo @@ -685,13 +684,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -714,13 +713,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: global_load_d16_i8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -743,13 +742,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: global_load_i8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -773,13 +772,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -807,9 +806,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo @@ -836,13 +835,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -865,13 +864,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -894,13 +893,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -924,13 +923,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -987,11 +986,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1015,11 +1014,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1043,11 +1042,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1072,11 +1071,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1103,9 +1102,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo @@ -1132,13 +1131,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1161,13 +1160,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1190,13 +1189,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1220,13 +1219,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1254,9 +1253,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo @@ -1283,13 +1282,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1312,13 +1311,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1341,13 +1340,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1371,13 +1370,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1434,11 +1433,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1462,11 +1461,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1491,11 +1490,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1552,11 +1551,11 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1581,10 +1580,10 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1610,11 +1609,11 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1642,10 +1641,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo @@ -1672,14 +1671,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1702,14 +1701,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1733,14 +1732,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1800,11 +1799,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -1831,9 +1830,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -1861,11 +1860,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 @@ -1895,10 +1894,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo @@ -1927,14 +1926,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1959,14 +1958,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1990,14 +1989,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2029,10 +2028,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo @@ -2063,14 +2062,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: v_mov_b32_e32 v6, 3 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo @@ -2097,13 +2096,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2129,14 +2128,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo @@ -2198,11 +2197,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2226,11 +2225,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2254,11 +2253,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2283,11 +2282,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2343,11 +2342,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2371,11 +2370,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2400,11 +2399,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2461,11 +2460,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -2490,10 +2489,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2519,11 +2518,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2582,11 +2581,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2612,10 +2611,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2642,11 +2641,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2708,11 +2707,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo @@ -2740,9 +2739,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo @@ -2771,11 +2770,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo @@ -2836,11 +2835,11 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -2865,10 +2864,10 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2894,11 +2893,11 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2958,11 +2957,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -2989,9 +2988,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -3019,11 +3018,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 @@ -3087,11 +3086,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi @@ -3120,9 +3119,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo @@ -3151,11 +3150,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi @@ -3187,10 +3186,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi @@ -3220,15 +3219,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -3253,14 +3252,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 @@ -3286,14 +3285,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 @@ -3320,15 +3319,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -3358,10 +3357,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo @@ -3391,14 +3390,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3424,14 +3423,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3457,14 +3456,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3495,10 +3494,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo @@ -3529,14 +3528,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3563,14 +3562,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3597,14 +3596,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3636,10 +3635,10 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo @@ -3672,14 +3671,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -3708,14 +3707,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -3744,14 +3743,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -3785,10 +3784,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo @@ -3824,14 +3823,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -3863,14 +3862,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -3901,14 +3900,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -3945,14 +3944,14 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -4016,17 +4015,17 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -4088,15 +4087,15 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -4155,17 +4154,17 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) @@ -4233,12 +4232,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ubyte v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo @@ -4269,16 +4268,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_byte v[40:41], v0, off @@ -4305,17 +4304,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: global_store_b8 v[40:41], v0, off @@ -4341,17 +4340,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: global_store_b8 v[40:41], v0, off @@ -4378,16 +4377,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_byte v[40:41], v0, off @@ -4421,12 +4420,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ushort v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi @@ -4462,16 +4461,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo ; GFX10-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4503,17 +4502,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 @@ -4547,17 +4546,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 @@ -4591,16 +4590,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -4639,12 +4638,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo @@ -4683,16 +4682,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4727,17 +4726,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4774,17 +4773,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4822,16 +4821,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4873,12 +4872,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo @@ -4918,16 +4917,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4963,17 +4962,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -5011,17 +5010,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -5064,16 +5063,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -5116,12 +5115,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo @@ -5166,16 +5165,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -5216,17 +5215,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -5269,17 +5268,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -5327,16 +5326,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] @@ -5384,12 +5383,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo @@ -5439,16 +5438,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -5494,17 +5493,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -5552,17 +5551,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -5617,16 +5616,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 @@ -5679,6 +5678,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v44, s34, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -5689,7 +5689,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v43, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX9-NEXT: v_writelane_b32 v44, s34, 2 ; GFX9-NEXT: v_writelane_b32 v44, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo @@ -5801,6 +5800,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v44, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -5809,13 +5809,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-NEXT: v_writelane_b32 v44, s34, 2 +; GFX10-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -5924,6 +5923,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 @@ -5932,12 +5932,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) @@ -6051,6 +6050,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 @@ -6059,12 +6059,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-FAKE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) @@ -6203,6 +6202,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:12 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:4 ; 4-byte Folded Spill @@ -6211,13 +6211,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) @@ -6334,8 +6333,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo @@ -6362,12 +6361,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6390,12 +6389,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6419,12 +6418,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6452,8 +6451,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo @@ -6480,12 +6479,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6508,12 +6507,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6537,12 +6536,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6570,8 +6569,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo @@ -6598,12 +6597,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6626,12 +6625,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6655,12 +6654,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6718,11 +6717,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6747,10 +6746,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6776,11 +6775,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6838,11 +6837,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6867,11 +6866,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6897,11 +6896,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6929,8 +6928,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo @@ -6957,12 +6956,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6985,12 +6984,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7014,12 +7013,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7077,11 +7076,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -7106,11 +7105,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7136,11 +7135,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7168,8 +7167,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo @@ -7196,12 +7195,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7224,12 +7223,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7253,12 +7252,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7286,8 +7285,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo @@ -7314,12 +7313,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7342,12 +7341,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7371,12 +7370,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7434,11 +7433,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -7463,10 +7462,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7492,11 +7491,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7555,11 +7554,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -7585,10 +7584,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_mov_b32_e32 v2, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -7615,11 +7614,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -7680,11 +7679,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: v_mov_b32_e32 v3, 6 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -7711,9 +7710,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -7741,11 +7740,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 @@ -7775,8 +7774,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo @@ -7803,12 +7802,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7831,12 +7830,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7860,12 +7859,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7925,11 +7924,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -7956,9 +7955,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -7986,11 +7985,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 @@ -8053,11 +8052,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo @@ -8085,9 +8084,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo @@ -8116,11 +8115,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo @@ -8156,12 +8155,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8186,12 +8185,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -8219,12 +8219,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1] ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -8253,12 +8253,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -8325,11 +8326,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-NEXT: v_mov_b32_e32 v6, 7 @@ -8360,9 +8361,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi @@ -8392,11 +8393,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7 @@ -8435,6 +8436,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 @@ -8442,7 +8444,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8467,6 +8468,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 @@ -8474,7 +8476,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -8502,6 +8504,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v12, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x3 @@ -8509,7 +8512,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: global_load_b128 v[4:7], v12, s[0:1] offset:16 ; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32 ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -8538,6 +8540,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x3 @@ -8545,7 +8548,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -8581,6 +8584,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8593,7 +8597,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8618,6 +8621,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 @@ -8629,7 +8633,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -8657,6 +8661,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v28, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 @@ -8668,7 +8673,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -8697,6 +8701,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x7 @@ -8708,7 +8713,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -8744,6 +8749,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8754,7 +8760,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -8784,6 +8789,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -8796,7 +8802,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -8826,6 +8832,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v28, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v32, v[0:1], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -8838,7 +8845,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -8868,6 +8874,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -8880,7 +8887,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -8953,14 +8960,14 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-NEXT: v_writelane_b32 v42, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[40:41], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -8991,9 +8998,9 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v41, v1 :: v_dual_mov_b32 v40, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 -; GFX11-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -9027,14 +9034,14 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_dword v[40:41], v0, off ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 @@ -9071,12 +9078,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -9101,12 +9108,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -9134,8 +9142,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] @@ -9167,12 +9175,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[0:1] ; GFX11-FAKE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 @@ -9201,12 +9209,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -9237,8 +9246,8 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -9272,9 +9281,9 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 @@ -9304,9 +9313,9 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 @@ -9336,9 +9345,9 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 @@ -9370,9 +9379,9 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33 @@ -9408,10 +9417,10 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -9449,19 +9458,19 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 @@ -9685,8 +9694,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] @@ -9736,8 +9745,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1] @@ -9784,8 +9793,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] @@ -10112,13 +10121,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -10141,15 +10150,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -10171,13 +10179,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 @@ -10234,11 +10242,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -10264,11 +10272,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10295,11 +10303,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10359,11 +10367,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -10389,11 +10397,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10420,11 +10428,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10484,11 +10492,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -10514,11 +10522,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 42 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10545,11 +10553,11 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10612,11 +10620,11 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -10645,11 +10653,11 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -10679,11 +10687,11 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -10753,9 +10761,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -10790,9 +10798,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -10828,9 +10836,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -10909,11 +10917,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -10948,11 +10956,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -10988,11 +10996,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -11074,9 +11082,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -11117,9 +11125,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -11161,9 +11169,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -11260,9 +11268,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -11309,9 +11317,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -11359,9 +11367,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -11444,11 +11452,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -11474,11 +11482,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x4400 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -11505,11 +11513,11 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -11569,11 +11577,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -11599,11 +11607,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 4.0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -11630,11 +11638,11 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -11697,11 +11705,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -11730,11 +11738,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -11764,11 +11772,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -11837,11 +11845,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -11873,11 +11881,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -11910,11 +11918,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -11992,11 +12000,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -12034,11 +12042,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -12077,11 +12085,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -12156,11 +12164,11 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -12189,11 +12197,11 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0x40100000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -12223,11 +12231,11 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -12299,11 +12307,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -12338,11 +12346,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -12378,11 +12386,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -12466,11 +12474,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -12511,11 +12519,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -12557,11 +12565,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -12636,11 +12644,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -12666,11 +12674,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12697,11 +12705,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12764,8 +12772,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi @@ -12796,8 +12804,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi @@ -12829,8 +12837,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi @@ -12898,8 +12906,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi @@ -12930,8 +12938,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi @@ -12963,8 +12971,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi @@ -13033,11 +13041,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -13066,11 +13074,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x20001 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 3 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -13100,11 +13108,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -13170,11 +13178,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -13203,11 +13211,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_movk_i32 s5, 0x4400 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -13237,11 +13245,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -13306,8 +13314,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi @@ -13338,8 +13346,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi @@ -13371,8 +13379,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi @@ -13441,11 +13449,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -13474,11 +13482,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x20001 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0x40003 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -13508,11 +13516,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -13575,11 +13583,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -13605,11 +13613,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13636,11 +13644,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13703,8 +13711,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi @@ -13735,8 +13743,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi @@ -13768,8 +13776,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi @@ -13838,11 +13846,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -13871,11 +13879,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -13905,11 +13913,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 @@ -13978,11 +13986,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -14014,11 +14022,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 4 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -14051,11 +14059,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -14130,11 +14138,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -14169,11 +14177,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 4 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -14209,11 +14217,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -14288,8 +14296,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -14324,8 +14332,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -14361,8 +14369,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -14441,11 +14449,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -14480,11 +14488,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -14520,11 +14528,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -14605,11 +14613,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -14647,11 +14655,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -14690,11 +14698,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -14782,9 +14790,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -14828,9 +14836,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -14875,9 +14883,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -14978,11 +14986,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 @@ -15029,11 +15037,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -15081,11 +15089,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -15198,9 +15206,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 18 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -15260,9 +15268,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 18 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -15323,9 +15331,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -15497,9 +15505,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -15604,11 +15612,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -15707,10 +15715,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -15928,9 +15936,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -16040,11 +16048,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s3, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -16147,10 +16155,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -16263,9 +16271,9 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi @@ -16296,10 +16304,11 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo @@ -16307,7 +16316,6 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -16330,12 +16338,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 @@ -16360,12 +16368,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 @@ -16395,13 +16403,13 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -16657,6 +16665,7 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -16671,7 +16680,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -16732,18 +16740,18 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-NEXT: v_mov_b32_e32 v2, 10 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_mov_b32_e32 v3, 14 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 11 ; GFX10-NEXT: v_mov_b32_e32 v1, 12 ; GFX10-NEXT: v_mov_b32_e32 v2, 13 -; GFX10-NEXT: v_mov_b32_e32 v3, 14 ; GFX10-NEXT: v_mov_b32_e32 v4, 15 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 @@ -16941,6 +16949,7 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -16955,7 +16964,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -17016,18 +17024,18 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000 -; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 @@ -17258,10 +17266,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17285,10 +17293,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17313,10 +17321,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17372,10 +17380,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17399,10 +17407,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17427,10 +17435,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17486,10 +17494,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17513,10 +17521,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17541,10 +17549,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17600,10 +17608,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17627,10 +17635,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17655,10 +17663,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17714,10 +17722,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17741,10 +17749,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17769,10 +17777,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17828,10 +17836,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17855,10 +17863,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17883,10 +17891,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -17942,10 +17950,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -17969,10 +17977,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -17997,10 +18005,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18056,10 +18064,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18083,10 +18091,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18111,10 +18119,10 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18170,10 +18178,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18197,10 +18205,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18225,10 +18233,10 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18284,10 +18292,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18311,10 +18319,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18339,10 +18347,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18398,10 +18406,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18425,10 +18433,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18453,10 +18461,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18512,10 +18520,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18539,10 +18547,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18567,10 +18575,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18626,10 +18634,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18653,10 +18661,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18681,10 +18689,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -18740,10 +18748,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -18767,10 +18775,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -18795,10 +18803,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 124de7e00f020..77c34b69820ce 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -49,9 +49,9 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 @@ -83,9 +83,9 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 @@ -246,10 +246,10 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: ;;#ASMSTART @@ -283,10 +283,10 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: ;;#ASMSTART @@ -362,16 +362,16 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 +; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v40, v31 -; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v40 ; GFX10-NEXT: ;;#ASMSTART @@ -399,18 +399,18 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 +; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: v_writelane_b32 v41, s30, 0 +; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v31 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v40, v31 -; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v31, v40 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v31 @@ -480,15 +480,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 @@ -517,15 +517,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s33 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, s33 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: s_mov_b32 s4, s33 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_mov_b32 s33, s4 @@ -597,15 +597,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, s34 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s34, s4 @@ -634,15 +634,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s34 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, s34 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: s_mov_b32 s4, s34 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_mov_b32 s34, s4 @@ -712,15 +712,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 +; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 @@ -747,15 +747,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 +; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: v_writelane_b32 v41, s30, 0 +; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v40 @@ -870,10 +870,10 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -897,10 +897,10 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -955,10 +955,10 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -982,10 +982,10 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1049,15 +1049,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART @@ -1085,15 +1085,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, s40 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: s_mov_b32 s4, s40 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART @@ -1172,11 +1172,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 3 +; GFX10-NEXT: v_writelane_b32 v41, s4, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX10-NEXT: v_writelane_b32 v41, s4, 0 +; GFX10-NEXT: v_writelane_b32 v41, s30, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -1185,7 +1186,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v40, v32 -; GFX10-NEXT: v_writelane_b32 v41, s30, 1 ; GFX10-NEXT: v_writelane_b32 v41, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART @@ -1217,11 +1217,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 3 +; GFX11-NEXT: v_writelane_b32 v41, s4, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: v_writelane_b32 v41, s4, 0 +; GFX11-NEXT: v_writelane_b32 v41, s30, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND @@ -1230,7 +1231,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: ; def v32 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v40, v32 -; GFX11-NEXT: v_writelane_b32 v41, s30, 1 ; GFX11-NEXT: v_writelane_b32 v41, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index b750d28ffa7d3..3110bbae6101c 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -2142,12 +2142,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: s_mov_b32 s38, s34 +; GFX9-NEXT: s_mov_b32 s34, s32 ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi ; GFX9-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_mov_b32 s38, s34 -; GFX9-NEXT: s_mov_b32 s34, s32 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] @@ -2172,12 +2172,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s36 +; GFX10-NEXT: s_mov_b32 s38, s34 +; GFX10-NEXT: s_mov_b32 s34, s32 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi ; GFX10-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo -; GFX10-NEXT: s_mov_b32 s38, s34 -; GFX10-NEXT: s_mov_b32 s34, s32 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] @@ -2203,12 +2203,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s36, s34 +; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, s33 ; GFX11-NEXT: s_mov_b32 s1, return_512xi32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, return_512xi32@abs32@lo -; GFX11-NEXT: s_mov_b32 s36, s34 -; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: s_addk_i32 s32, 0x1800 ; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2910,9 +2910,9 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s36 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_mov_b32 s38, s34 ; GFX10-NEXT: s_mov_b32 s34, s32 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x14000 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill @@ -2929,6 +2929,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v63, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -2971,13 +2973,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v63, s30, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v6, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 @@ -3189,6 +3189,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s36, s34 +; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: s_mov_b32 s1, s0 @@ -3196,8 +3198,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_mov_b32 s36, s34 -; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: s_addk_i32 s32, 0xa00 ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44 diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll index 58cd2f5bc11af..5a344c8ee37f9 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll @@ -9,26 +9,237 @@ define fastcc i32 @foo() { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32 ; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17 - ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr40, 2, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 + ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc ; CHECK-NEXT: BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $scc, implicit-def $sgpr17 { ; CHECK-NEXT: $sgpr16_sgpr17 = S_GETPC_B64 ; CHECK-NEXT: $sgpr16 = S_ADD_U32 internal $sgpr16, target-flags(amdgpu-gotprel32-lo) @bar + 4, implicit-def $scc ; CHECK-NEXT: $sgpr17 = S_ADDC_U32 internal $sgpr17, target-flags(amdgpu-gotprel32-hi) @bar + 12, implicit-def $scc, implicit internal $scc ; CHECK-NEXT: } + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40 + ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) + ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; CHECK-NEXT: BUFFER_GL1_INV implicit $exec ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec - ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40 ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40 - ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} @@ -46,6 +257,7 @@ define fastcc i32 @foo() { ; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 killed $sgpr4 ; CHECK-NEXT: S_WAITCNT 16240 ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit undef $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir index a244a433a4efb..55f21d95bcac4 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir @@ -280,8 +280,217 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40 ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 5f0ca7bc42ae0..db80f5479d36b 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -109,15 +109,15 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s5, s33 ; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0 +; MUBUF-NEXT: s_mov_b32 s6, s34 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000 +; MUBUF-NEXT: s_mov_b32 s34, s32 ; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33 ; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3 -; MUBUF-NEXT: s_mov_b32 s6, s34 ; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3 ; MUBUF-NEXT: v_mov_b32_e32 v3, 0 ; MUBUF-NEXT: v_mov_b32_e32 v4, 0x2000 ; MUBUF-NEXT: s_mov_b32 s4, 0 -; MUBUF-NEXT: s_mov_b32 s34, s32 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x200000 ; MUBUF-NEXT: buffer_store_dword v3, v4, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) @@ -145,11 +145,11 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_mov_b32 s32, s34 ; MUBUF-NEXT: s_mov_b32 s34, s6 -; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6 ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc ; MUBUF-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; MUBUF-NEXT: s_waitcnt vmcnt(0) +; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: func_local_stack_offset_uses_sp: @@ -157,8 +157,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s2, s33 ; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff -; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_mov_b32 s3, s34 +; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_mov_b32 s34, s32 ; FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 @@ -186,11 +186,11 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s32, s34 ; FLATSCR-NEXT: s_mov_b32 s34, s3 -; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc ; FLATSCR-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: %pin.low = alloca i32, align 8192, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index ccaf0ac5377e4..94e997cf49ddb 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -18,8 +18,8 @@ define void @test_func_call_external_void_func_i32_imm() #0 { ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 @@ -52,8 +52,8 @@ define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: s_addk_i32 s32, 0x1400 ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: s_addk_i32 s32, 0x1400 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index e6243f0e41826..7155c8e085470 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -12,13 +12,224 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 2566 +; CHECK-NEXT: .cfi_undefined 2567 +; CHECK-NEXT: .cfi_undefined 2568 +; CHECK-NEXT: .cfi_undefined 2569 +; CHECK-NEXT: .cfi_undefined 2570 +; CHECK-NEXT: .cfi_undefined 2571 +; CHECK-NEXT: .cfi_undefined 2572 +; CHECK-NEXT: .cfi_undefined 2573 +; CHECK-NEXT: .cfi_undefined 2574 +; CHECK-NEXT: .cfi_undefined 2575 +; CHECK-NEXT: .cfi_undefined 2576 +; CHECK-NEXT: .cfi_undefined 2577 +; CHECK-NEXT: .cfi_undefined 2578 +; CHECK-NEXT: .cfi_undefined 2579 +; CHECK-NEXT: .cfi_undefined 2580 +; CHECK-NEXT: .cfi_undefined 2581 +; CHECK-NEXT: .cfi_undefined 2582 +; CHECK-NEXT: .cfi_undefined 2583 +; CHECK-NEXT: .cfi_undefined 2584 +; CHECK-NEXT: .cfi_undefined 2585 +; CHECK-NEXT: .cfi_undefined 2586 +; CHECK-NEXT: .cfi_undefined 2587 +; CHECK-NEXT: .cfi_undefined 2588 +; CHECK-NEXT: .cfi_undefined 2589 +; CHECK-NEXT: .cfi_undefined 2590 +; CHECK-NEXT: .cfi_undefined 2591 +; CHECK-NEXT: .cfi_undefined 2592 +; CHECK-NEXT: .cfi_undefined 2593 +; CHECK-NEXT: .cfi_undefined 2594 +; CHECK-NEXT: .cfi_undefined 2595 +; CHECK-NEXT: .cfi_undefined 2596 +; CHECK-NEXT: .cfi_undefined 2597 +; CHECK-NEXT: .cfi_undefined 2598 +; CHECK-NEXT: .cfi_undefined 2599 +; CHECK-NEXT: .cfi_undefined 2608 +; CHECK-NEXT: .cfi_undefined 2609 +; CHECK-NEXT: .cfi_undefined 2610 +; CHECK-NEXT: .cfi_undefined 2611 +; CHECK-NEXT: .cfi_undefined 2612 +; CHECK-NEXT: .cfi_undefined 2613 +; CHECK-NEXT: .cfi_undefined 2614 +; CHECK-NEXT: .cfi_undefined 2615 +; CHECK-NEXT: .cfi_undefined 2624 +; CHECK-NEXT: .cfi_undefined 2625 +; CHECK-NEXT: .cfi_undefined 2626 +; CHECK-NEXT: .cfi_undefined 2627 +; CHECK-NEXT: .cfi_undefined 2628 +; CHECK-NEXT: .cfi_undefined 2629 +; CHECK-NEXT: .cfi_undefined 2630 +; CHECK-NEXT: .cfi_undefined 2631 +; CHECK-NEXT: .cfi_undefined 2640 +; CHECK-NEXT: .cfi_undefined 2641 +; CHECK-NEXT: .cfi_undefined 2642 +; CHECK-NEXT: .cfi_undefined 2643 +; CHECK-NEXT: .cfi_undefined 2644 +; CHECK-NEXT: .cfi_undefined 2645 +; CHECK-NEXT: .cfi_undefined 2646 +; CHECK-NEXT: .cfi_undefined 2647 +; CHECK-NEXT: .cfi_undefined 2656 +; CHECK-NEXT: .cfi_undefined 2657 +; CHECK-NEXT: .cfi_undefined 2658 +; CHECK-NEXT: .cfi_undefined 2659 +; CHECK-NEXT: .cfi_undefined 2660 +; CHECK-NEXT: .cfi_undefined 2661 +; CHECK-NEXT: .cfi_undefined 2662 +; CHECK-NEXT: .cfi_undefined 2663 +; CHECK-NEXT: .cfi_undefined 2672 +; CHECK-NEXT: .cfi_undefined 2673 +; CHECK-NEXT: .cfi_undefined 2674 +; CHECK-NEXT: .cfi_undefined 2675 +; CHECK-NEXT: .cfi_undefined 2676 +; CHECK-NEXT: .cfi_undefined 2677 +; CHECK-NEXT: .cfi_undefined 2678 +; CHECK-NEXT: .cfi_undefined 2679 +; CHECK-NEXT: .cfi_undefined 2688 +; CHECK-NEXT: .cfi_undefined 2689 +; CHECK-NEXT: .cfi_undefined 2690 +; CHECK-NEXT: .cfi_undefined 2691 +; CHECK-NEXT: .cfi_undefined 2692 +; CHECK-NEXT: .cfi_undefined 2693 +; CHECK-NEXT: .cfi_undefined 2694 +; CHECK-NEXT: .cfi_undefined 2695 +; CHECK-NEXT: .cfi_undefined 2704 +; CHECK-NEXT: .cfi_undefined 2705 +; CHECK-NEXT: .cfi_undefined 2706 +; CHECK-NEXT: .cfi_undefined 2707 +; CHECK-NEXT: .cfi_undefined 2708 +; CHECK-NEXT: .cfi_undefined 2709 +; CHECK-NEXT: .cfi_undefined 2710 +; CHECK-NEXT: .cfi_undefined 2711 +; CHECK-NEXT: .cfi_undefined 2720 +; CHECK-NEXT: .cfi_undefined 2721 +; CHECK-NEXT: .cfi_undefined 2722 +; CHECK-NEXT: .cfi_undefined 2723 +; CHECK-NEXT: .cfi_undefined 2724 +; CHECK-NEXT: .cfi_undefined 2725 +; CHECK-NEXT: .cfi_undefined 2726 +; CHECK-NEXT: .cfi_undefined 2727 +; CHECK-NEXT: .cfi_undefined 2736 +; CHECK-NEXT: .cfi_undefined 2737 +; CHECK-NEXT: .cfi_undefined 2738 +; CHECK-NEXT: .cfi_undefined 2739 +; CHECK-NEXT: .cfi_undefined 2740 +; CHECK-NEXT: .cfi_undefined 2741 +; CHECK-NEXT: .cfi_undefined 2742 +; CHECK-NEXT: .cfi_undefined 2743 +; CHECK-NEXT: .cfi_undefined 2752 +; CHECK-NEXT: .cfi_undefined 2753 +; CHECK-NEXT: .cfi_undefined 2754 +; CHECK-NEXT: .cfi_undefined 2755 +; CHECK-NEXT: .cfi_undefined 2756 +; CHECK-NEXT: .cfi_undefined 2757 +; CHECK-NEXT: .cfi_undefined 2758 +; CHECK-NEXT: .cfi_undefined 2759 +; CHECK-NEXT: .cfi_undefined 2768 +; CHECK-NEXT: .cfi_undefined 2769 +; CHECK-NEXT: .cfi_undefined 2770 +; CHECK-NEXT: .cfi_undefined 2771 +; CHECK-NEXT: .cfi_undefined 2772 +; CHECK-NEXT: .cfi_undefined 2773 +; CHECK-NEXT: .cfi_undefined 2774 +; CHECK-NEXT: .cfi_undefined 2775 +; CHECK-NEXT: .cfi_undefined 2784 +; CHECK-NEXT: .cfi_undefined 2785 +; CHECK-NEXT: .cfi_undefined 2786 +; CHECK-NEXT: .cfi_undefined 2787 +; CHECK-NEXT: .cfi_undefined 2788 +; CHECK-NEXT: .cfi_undefined 2789 +; CHECK-NEXT: .cfi_undefined 2790 +; CHECK-NEXT: .cfi_undefined 2791 +; CHECK-NEXT: .cfi_undefined 2800 +; CHECK-NEXT: .cfi_undefined 2801 +; CHECK-NEXT: .cfi_undefined 2802 +; CHECK-NEXT: .cfi_undefined 2803 +; CHECK-NEXT: .cfi_undefined 2804 +; CHECK-NEXT: .cfi_undefined 2805 +; CHECK-NEXT: .cfi_undefined 2806 +; CHECK-NEXT: .cfi_undefined 2807 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 72 +; CHECK-NEXT: .cfi_undefined 73 +; CHECK-NEXT: .cfi_undefined 74 +; CHECK-NEXT: .cfi_undefined 75 +; CHECK-NEXT: .cfi_undefined 76 +; CHECK-NEXT: .cfi_undefined 77 +; CHECK-NEXT: .cfi_undefined 78 +; CHECK-NEXT: .cfi_undefined 79 +; CHECK-NEXT: .cfi_undefined 88 +; CHECK-NEXT: .cfi_undefined 89 +; CHECK-NEXT: .cfi_undefined 90 +; CHECK-NEXT: .cfi_undefined 91 +; CHECK-NEXT: .cfi_undefined 92 +; CHECK-NEXT: .cfi_undefined 93 +; CHECK-NEXT: .cfi_undefined 94 +; CHECK-NEXT: .cfi_undefined 95 +; CHECK-NEXT: .cfi_undefined 1096 +; CHECK-NEXT: .cfi_undefined 1097 +; CHECK-NEXT: .cfi_undefined 1098 +; CHECK-NEXT: .cfi_undefined 1099 +; CHECK-NEXT: .cfi_undefined 1100 +; CHECK-NEXT: .cfi_undefined 1101 +; CHECK-NEXT: .cfi_undefined 1102 +; CHECK-NEXT: .cfi_undefined 1103 +; CHECK-NEXT: .cfi_undefined 1112 +; CHECK-NEXT: .cfi_undefined 1113 +; CHECK-NEXT: .cfi_undefined 1114 +; CHECK-NEXT: .cfi_undefined 1115 +; CHECK-NEXT: .cfi_undefined 1116 +; CHECK-NEXT: .cfi_undefined 1117 +; CHECK-NEXT: .cfi_undefined 1118 +; CHECK-NEXT: .cfi_undefined 1119 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2600, 0 ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 @@ -43,6 +254,7 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index 627f4ada95dba..bac460949d579 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -219,8 +219,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s7, s33 -; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz .LBB2_3 @@ -254,8 +254,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s3, s33 -; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz .LBB2_3 @@ -317,9 +317,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; MUBUF-NEXT: s_mov_b32 s7, s33 ; MUBUF-NEXT: s_add_i32 s33, s32, 0xfc0 ; MUBUF-NEXT: s_mov_b32 s8, s34 -; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000 ; MUBUF-NEXT: s_mov_b32 s34, s32 +; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_addk_i32 s32, 0x2000 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz .LBB3_2 @@ -354,9 +354,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; FLATSCR-NEXT: s_mov_b32 s3, s33 ; FLATSCR-NEXT: s_add_i32 s33, s32, 63 ; FLATSCR-NEXT: s_mov_b32 s4, s34 -; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63 ; FLATSCR-NEXT: s_mov_b32 s34, s32 +; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_addk_i32 s32, 0x80 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz .LBB3_2 diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir index bb248fe0444db..c61241c65b326 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -36,6 +36,114 @@ body: | ; GCN-LABEL: name: preserve_active_lanes_above_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10 @@ -69,8 +177,125 @@ body: | ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr10, 0 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 @@ -112,6 +337,122 @@ body: | ; GCN-LABEL: name: dont_preserve_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc @@ -143,8 +484,125 @@ body: | ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr9, 0 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 @@ -186,6 +644,17 @@ body: | ; GCN-LABEL: name: dont_preserve_if_no_chain_calls ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 @@ -222,6 +691,116 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 @@ -260,6 +839,114 @@ body: | ; GCN-LABEL: name: dont_preserve_sgpr ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir index 4aea915936ffc..b4f4412373509 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir @@ -37,9 +37,127 @@ body: | ; GCN-LABEL: name: preserve_inactive_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr8, 0 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr9, 128 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) @@ -73,6 +191,18 @@ body: | ; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls ; GCN: liveins: $sgpr35, $vgpr8 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 @@ -106,6 +236,114 @@ body: | ; GCN-LABEL: name: dont_preserve_wwm_if_init_whole_wave ; GCN: liveins: $sgpr0, $sgpr35 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1 @@ -131,6 +369,116 @@ body: | ; GCN-LABEL: name: dont_preserve_non_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc @@ -162,6 +510,118 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 @@ -200,6 +660,114 @@ body: | ; GCN-LABEL: name: dont_preserve_sgpr ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir index 4b4e9f1d81ec6..fa52c2f2bba71 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir @@ -20,7 +20,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v1 - ; MUBUF: $vgpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-NEXT: S_ENDPGM 0 @@ -28,13 +31,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v1 ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v1 - ; FLATSCR: $vgpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -42,13 +52,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v1 - ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -56,13 +73,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v1 - ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -70,6 +94,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec @@ -93,7 +121,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v2 - ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -103,6 +135,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -111,7 +149,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v2 - ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -119,6 +161,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -127,7 +175,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v2 - ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -137,6 +189,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -145,7 +203,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v2 - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -153,6 +215,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -178,7 +246,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v3 - ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -190,6 +263,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -200,7 +281,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v3 - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -208,6 +294,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -218,7 +312,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v3 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -230,6 +329,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -240,7 +347,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v3 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -248,6 +360,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -275,7 +395,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v4 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -289,6 +415,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -301,7 +437,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v4 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -309,6 +451,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -321,7 +473,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v4 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -335,6 +493,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -347,7 +515,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v4 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -355,6 +529,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -384,7 +568,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v5 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -400,6 +591,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -414,7 +617,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v5 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -424,6 +634,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -438,7 +660,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v5 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -454,6 +683,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -468,7 +709,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v5 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -478,6 +726,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -509,7 +769,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v6 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -527,6 +795,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -543,7 +825,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v6 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -553,6 +843,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -569,7 +873,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v6 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -587,6 +899,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -603,7 +929,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v6 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -613,6 +947,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -646,7 +994,16 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v7 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -666,6 +1023,22 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v7 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -684,7 +1057,16 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v7 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -694,6 +1076,22 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -712,7 +1110,16 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v7 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -732,6 +1139,22 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v7 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -750,7 +1173,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v7 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -760,6 +1192,22 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -795,7 +1243,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v8 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -817,6 +1275,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -837,7 +1313,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v8 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -847,6 +1333,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -867,7 +1371,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v8 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -889,6 +1403,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -909,7 +1441,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v8 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -919,6 +1461,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -956,7 +1516,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -994,6 +1572,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1030,7 +1642,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1044,6 +1674,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1080,7 +1744,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1118,6 +1800,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1154,7 +1870,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1168,6 +1902,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1221,7 +1989,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v32 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1291,6 +2093,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1359,7 +2227,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v32 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1381,6 +2283,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1449,7 +2417,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v32 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1519,6 +2521,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1587,7 +2655,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v32 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1609,6 +2711,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1694,7 +2862,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a1 - ; MUBUF: $agpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) @@ -1704,13 +2875,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a1 ; MUBUF-V2A: liveins: $vgpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a1 - ; FLATSCR: $agpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) @@ -1720,13 +2898,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-V2A: liveins: $vgpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a1 - ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -1734,13 +2919,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a1 - ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1748,6 +2940,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec @@ -1771,7 +2967,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a2 - ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1785,6 +2985,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a2 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1793,7 +2999,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a2 - ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1807,6 +3017,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1815,7 +3031,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a2 - ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -1825,6 +3045,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1833,7 +3059,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a2 - ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1841,6 +3071,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 @@ -1866,7 +3102,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a3 - ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1884,6 +3125,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a3 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1894,7 +3143,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a3 - ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1912,6 +3166,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1922,7 +3184,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a3 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1934,6 +3201,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1944,7 +3219,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a3 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1952,6 +3232,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 @@ -1979,7 +3267,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a4 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2001,6 +3295,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a4 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2013,7 +3317,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a4 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2035,6 +3345,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2047,7 +3367,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a4 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2061,6 +3387,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2073,7 +3409,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a4 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -2081,6 +3423,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -2110,7 +3462,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a5 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2136,6 +3495,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a5 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2150,7 +3521,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a5 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2176,6 +3554,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2190,7 +3580,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a5 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2206,6 +3603,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2220,7 +3629,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a5 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2230,6 +3646,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -2261,7 +3689,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a6 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2291,6 +3727,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a6 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2307,7 +3757,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a6 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2337,6 +3795,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2353,7 +3825,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a6 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2371,6 +3851,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2387,7 +3881,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a6 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2397,6 +3899,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -2430,7 +3946,16 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a7 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2464,6 +3989,22 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a7 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2482,7 +4023,16 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a7 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2516,6 +4066,22 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2534,7 +4100,16 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a7 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2554,6 +4129,22 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a7 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2572,7 +4163,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a7 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2582,6 +4182,22 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 @@ -2617,7 +4233,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a8 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2655,6 +4281,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a8 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2675,7 +4319,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a8 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2713,6 +4367,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2733,7 +4405,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a8 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2755,6 +4437,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2775,7 +4475,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a8 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2785,6 +4495,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -2822,7 +4550,18 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a9 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2864,6 +4603,26 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a9 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2886,7 +4645,18 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a9 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2928,6 +4698,26 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a9 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2950,7 +4740,18 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a9 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2974,6 +4775,26 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a9 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2996,7 +4817,18 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a9 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5) @@ -3008,6 +4840,26 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a9 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 @@ -3047,7 +4899,19 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a10 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3093,6 +4957,28 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a10 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3117,7 +5003,19 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a10 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3163,6 +5061,28 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a10 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3187,7 +5107,19 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a10 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3213,6 +5145,28 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a10 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3237,7 +5191,19 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a10 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s64) into %stack.0 + 32, align 4, addrspace 5) @@ -3249,6 +5215,28 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a10 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 @@ -3290,7 +5278,20 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a11 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3340,6 +5341,30 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a11 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3366,7 +5391,20 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a11 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3416,6 +5454,30 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a11 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3442,7 +5504,20 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a11 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3470,6 +5545,30 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a11 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3496,7 +5595,20 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a11 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s96) into %stack.0 + 32, align 4, addrspace 5) @@ -3508,6 +5620,30 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a11 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 @@ -3551,7 +5687,21 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a12 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3605,6 +5755,32 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a12 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3633,7 +5809,21 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a12 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3687,6 +5877,32 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a12 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3715,7 +5931,21 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a12 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3745,6 +5975,32 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a12 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3773,7 +6029,21 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a12 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -3785,6 +6055,32 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a12 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 @@ -3830,7 +6126,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3900,6 +6214,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a16 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3936,7 +6284,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4006,6 +6372,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4042,7 +6442,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -4080,6 +6498,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4116,7 +6568,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -4130,6 +6600,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -4183,7 +6687,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a32 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4317,6 +6855,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a32 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4385,7 +6989,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a32 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4519,6 +7157,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4587,7 +7291,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a32 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -4657,6 +7395,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4725,7 +7529,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a32 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -4747,6 +7585,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index 8eddc9a5afd50..603aa92f1b27a 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -59,6 +59,10 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v2_partial_agpr ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -69,6 +73,10 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) @@ -96,6 +104,11 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v3_partial_agpr ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -108,6 +121,11 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -135,6 +153,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v4_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -149,6 +173,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -180,6 +210,13 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v5_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -196,6 +233,13 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -227,6 +271,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v6_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -245,6 +297,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -280,6 +340,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v8_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -302,6 +372,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -335,6 +415,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v16_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -373,6 +471,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir index 2fbe08300af57..94518c6ae455f 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir @@ -20,7 +20,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v1 - ; MUBUF: $vgpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-NEXT: S_ENDPGM 0 @@ -28,13 +31,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v1 ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v1 - ; FLATSCR: $vgpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -42,13 +52,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v1 ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v1 - ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -56,13 +73,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v1 - ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -70,6 +94,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec @@ -93,7 +121,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v2 - ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -103,6 +135,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -111,7 +149,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v2 - ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -119,6 +161,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -127,7 +175,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v2 - ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -137,6 +189,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -145,7 +203,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v2 - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -153,6 +215,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -178,7 +246,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v3 - ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -190,6 +263,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -200,7 +281,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v3 - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -208,6 +294,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -218,7 +312,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v3 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -230,6 +329,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -240,7 +347,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v3 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -248,6 +360,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -275,7 +395,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v4 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -289,6 +415,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -301,7 +437,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v4 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -309,6 +451,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -321,7 +473,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v4 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -335,6 +493,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -347,7 +515,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v4 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -355,6 +529,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -384,7 +568,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v5 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -400,6 +591,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -414,7 +617,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v5 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -424,6 +634,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -438,7 +660,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v5 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -454,6 +683,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -468,7 +709,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v5 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -478,6 +726,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -509,7 +769,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v6 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -527,6 +795,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -543,7 +825,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v6 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -553,6 +843,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -569,7 +873,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v6 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -587,6 +899,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -603,7 +929,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v6 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -613,6 +947,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -646,7 +994,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v8 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -668,6 +1026,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -688,7 +1064,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v8 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -698,6 +1084,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -718,7 +1122,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v8 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -740,6 +1154,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -760,7 +1192,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v8 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -770,6 +1212,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -807,7 +1267,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -845,6 +1323,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -881,7 +1393,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -895,6 +1425,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -931,7 +1495,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -969,6 +1551,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1005,7 +1621,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1019,6 +1653,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1072,7 +1740,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v32 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1142,6 +1844,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1210,7 +1978,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v32 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1232,6 +2034,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1300,7 +2168,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v32 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1370,6 +2272,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1438,7 +2406,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v32 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1460,6 +2462,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1545,7 +2613,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a1 - ; MUBUF: $agpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) @@ -1555,13 +2626,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a1 ; MUBUF-V2A: liveins: $vgpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a1 - ; FLATSCR: $agpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) @@ -1571,13 +2649,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a1 ; FLATSCR-V2A: liveins: $vgpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a1 - ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -1585,13 +2670,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a1 - ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1599,6 +2691,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec @@ -1622,7 +2718,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a2 - ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1636,6 +2736,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a2 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1644,7 +2750,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a2 - ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1658,6 +2768,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1666,7 +2782,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a2 - ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -1676,6 +2796,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1684,7 +2810,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a2 - ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1692,6 +2822,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 @@ -1717,7 +2853,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a3 - ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1735,6 +2876,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a3 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1745,7 +2894,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a3 - ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1763,6 +2917,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1773,7 +2935,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a3 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1785,6 +2952,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1795,7 +2970,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a3 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1803,6 +2983,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 @@ -1830,7 +3018,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a4 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1852,6 +3046,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a4 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1864,7 +3068,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a4 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1886,6 +3096,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1898,7 +3118,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a4 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1912,6 +3138,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1924,7 +3160,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a4 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1932,6 +3174,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -1961,7 +3213,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a5 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1987,6 +3246,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a5 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2001,7 +3272,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a5 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2027,6 +3305,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2041,7 +3331,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a5 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2057,6 +3354,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2071,7 +3380,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a5 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2081,6 +3397,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -2112,7 +3440,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a6 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2142,6 +3478,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a6 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2158,7 +3508,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a6 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2188,6 +3546,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2204,7 +3576,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a6 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2222,6 +3602,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2238,7 +3632,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a6 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2248,6 +3650,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -2281,7 +3697,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a8 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2319,6 +3745,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a8 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2339,7 +3783,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a8 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2377,6 +3831,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2397,7 +3869,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a8 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2419,6 +3901,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2439,7 +3939,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a8 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2449,6 +3959,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -2486,7 +4014,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2556,6 +4102,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a16 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2592,7 +4172,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2662,6 +4260,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2698,7 +4330,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2736,6 +4386,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2772,7 +4456,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -2786,6 +4488,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -2839,7 +4575,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a32 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2973,6 +4743,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a32 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3041,7 +4877,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a32 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3175,6 +5045,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3243,7 +5179,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a32 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3313,6 +5283,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3381,7 +5417,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a32 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -3403,6 +5473,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 05cbd4c2a010d..71e7ca11a86cd 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -29,11 +29,43 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -45,6 +77,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -77,11 +110,42 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr ; CHECK: liveins: $sgpr29, $sgpr40, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr29 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr40 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -93,6 +157,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr40 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -125,11 +190,41 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 ; CHECK: liveins: $sgpr28, $sgpr29, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 ; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr28 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr29 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -141,6 +236,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -172,11 +268,41 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc ; CHECK: liveins: $sgpr28, $sgpr29, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 ; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr28 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr29 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -188,6 +314,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index 4f1c9a20fddc3..7c4e03fd0e6df 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -25,11 +25,43 @@ body: | ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs ; MUBUF: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -39,17 +71,50 @@ body: | ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc ; ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs ; FLATSCR: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc @@ -58,6 +123,7 @@ body: | ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index 480859a09a347..cd335321e2156 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -24,11 +24,43 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -37,6 +69,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index 63a4759d8e740..fb3e8116d86a4 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -26,16 +26,85 @@ body: | ; GFX8-LABEL: name: pei_scavenge_vgpr_spill ; GFX8: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX8-NEXT: $sgpr4 = COPY $sgpr33 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192 @@ -51,22 +120,92 @@ body: | ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = COPY $sgpr4 ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX9-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec @@ -80,22 +219,92 @@ body: | ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX9-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX9-FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec @@ -108,6 +317,7 @@ body: | ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX9-FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir index bfca9331a5d25..023bcc563cdcd 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,+wavefrontsize32,-wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W32 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,-wavefrontsize32,+wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,+wavefrontsize32,-wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=W32 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,-wavefrontsize32,+wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=W64 --- | define void @one_block() { ret void } @@ -23,15 +23,61 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: one_block + ; W32: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 9 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W32-NEXT: $m0 = S_MOV_B32 9 + ; W32-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: one_block + ; W64: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 9 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W64-NEXT: $m0 = S_MOV_B32 9 + ; W64-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -47,15 +93,61 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block_csr_only - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: one_block_csr_only + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 16711935 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 + ; W32-NEXT: $m0 = S_MOV_B32 16711935 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: one_block_csr_only + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 16711935 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 + ; W64-NEXT: $m0 = S_MOV_B32 16711935 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -75,23 +167,125 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: multiple_blocks - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: multiple_blocks + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 65 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 65 + ; W32-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: multiple_blocks + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 65 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 65 + ; W64-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -109,19 +303,101 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: reg_tuples - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: reg_tuples + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; W32-NEXT: $m0 = S_MOV_B32 7 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 7 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: reg_tuples + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; W64-NEXT: $m0 = S_MOV_B32 7 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 7 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -145,17 +421,65 @@ stack: body: | bb.0: liveins: $sgpr30_sgpr31, $vgpr48 - ; CHECK-LABEL: name: locals - ; CHECK: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40 - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: locals + ; W32: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: locals + ; W64: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) S_NOP 0, implicit-def $vgpr40 @@ -182,10 +506,32 @@ body: | ; W32-LABEL: name: other_regs ; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 512 ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 640 ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr44, 768 ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; W32-NEXT: $m0 = S_MOV_B32 9 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) @@ -205,10 +551,32 @@ body: | ; W64-LABEL: name: other_regs ; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 1024 ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 1280 ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr44, 1536 ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; W64-NEXT: $m0 = S_MOV_B32 9 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) @@ -240,11 +608,27 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: entry_func - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: entry_func + ; W32: liveins: $sgpr30_sgpr31 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: entry_func + ; W64: liveins: $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -255,29 +639,89 @@ tracksRegLiveness: true machineFunctionInfo: stackPtrOffsetReg: $sgpr32 body: | - ; CHECK-LABEL: name: multiple_basic_blocks - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: multiple_basic_blocks + ; W32: bb.0: + ; W32-NEXT: successors: %bb.1(0x80000000) + ; W32-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 11 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W32-NEXT: S_BRANCH %bb.1 + ; W32-NEXT: {{ $}} + ; W32-NEXT: bb.1: + ; W32-NEXT: successors: %bb.2(0x80000000) + ; W32-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 + ; W32-NEXT: {{ $}} + ; W32-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 + ; W32-NEXT: S_BRANCH %bb.2 + ; W32-NEXT: {{ $}} + ; W32-NEXT: bb.2: + ; W32-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: $m0 = S_MOV_B32 11 + ; W32-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: multiple_basic_blocks + ; W64: bb.0: + ; W64-NEXT: successors: %bb.1(0x80000000) + ; W64-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 11 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W64-NEXT: S_BRANCH %bb.1 + ; W64-NEXT: {{ $}} + ; W64-NEXT: bb.1: + ; W64-NEXT: successors: %bb.2(0x80000000) + ; W64-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 + ; W64-NEXT: S_BRANCH %bb.2 + ; W64-NEXT: {{ $}} + ; W64-NEXT: bb.2: + ; W64-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: $m0 = S_MOV_B32 11 + ; W64-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 bb.0: liveins: $sgpr30_sgpr31, $vgpr44 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir index 168d63d3a95b9..37c8788d8d691 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir +++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir @@ -20,6 +20,9 @@ body: | ; GCN-LABEL: name: preserve_scratch_vgpr_inactive_lanes ; GCN: liveins: $sgpr35, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll index f4a9e7e8f2759..29e34a0454d16 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll @@ -17,6 +17,9 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: s_mov_b64 exec, -1 ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[18:19] +; GFX906-NEXT: v_writelane_b32 v41, s16, 4 +; GFX906-NEXT: v_writelane_b32 v41, s34, 2 +; GFX906-NEXT: v_writelane_b32 v41, s35, 3 ; GFX906-NEXT: s_mov_b32 s21, s15 ; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX906-NEXT: s_mov_b32 s22, s14 @@ -30,11 +33,8 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: v_writelane_b32 v39, s26, 4 ; GFX906-NEXT: v_writelane_b32 v39, s27, 5 ; GFX906-NEXT: v_writelane_b32 v39, s8, 6 -; GFX906-NEXT: v_writelane_b32 v41, s16, 4 ; GFX906-NEXT: v_writelane_b32 v39, s9, 7 -; GFX906-NEXT: v_writelane_b32 v41, s34, 2 ; GFX906-NEXT: v_writelane_b32 v39, s6, 8 -; GFX906-NEXT: v_writelane_b32 v41, s35, 3 ; GFX906-NEXT: v_writelane_b32 v39, s7, 9 ; GFX906-NEXT: v_writelane_b32 v41, s30, 0 ; GFX906-NEXT: v_writelane_b32 v39, s4, 10 diff --git a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll index bf417b211826a..ba460fc7b4266 100644 --- a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll +++ b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll @@ -14,6 +14,8 @@ define hidden void @_Z9base_casev() #0 !dbg !6 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 7 3 prologue_end ; file.cpp:7:3 diff --git a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll index e29f09dcac024..072f679390e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll @@ -14,6 +14,9 @@ define hidden void @ptr_arg_split_subregs(ptr %arg1) #0 !dbg !9 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2562 ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_subregs:a <- [DW_OP_LLVM_fragment 32 32] [$vgpr1+0] ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_subregs:a <- [DW_OP_LLVM_fragment 0 32] [$vgpr0+0] ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -43,6 +46,10 @@ define hidden void @ptr_arg_split_reg_mem(<30 x i32>, ptr %arg2) #0 !dbg !25 { ; CHECK-NEXT: .loc 1 10 0 ; example.cpp:10:0 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2591 ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_reg_mem:b <- [$vgpr30+0] ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 @@ -69,6 +76,11 @@ define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) #0 !dbg !31 { ; CHECK-NEXT: .loc 1 15 0 ; example.cpp:15:0 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir index 592e0f0cf0c24..9b226df530eec 100644 --- a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir @@ -15,6 +15,12 @@ body: | ; CHECK-LABEL: name: same_slot_agpr_sgpr ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF @@ -48,6 +54,12 @@ body: | ; CHECK-LABEL: name: diff_slot_agpr_sgpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF @@ -79,6 +91,10 @@ body: | ; CHECK-LABEL: name: dead_vgpr_slot ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index 925984b15367d..9e61fa0e681cc 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -30,15 +30,65 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr1, 0 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 128 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr3, 256 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr4, 384 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr5, 512 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2 @@ -206,6 +256,7 @@ body: | ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi ; GCN-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir index 59c4b715dd12e..09e25075e51c5 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -23,6 +23,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -73,6 +75,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 @@ -122,6 +126,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -170,6 +177,10 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 @@ -220,6 +231,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -273,6 +348,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -329,6 +468,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3 @@ -383,6 +586,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3 @@ -443,6 +710,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -507,6 +838,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir index cac9c85130a7b..a1fc683679f9d 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -25,6 +25,9 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll index 761ff7786b98e..3419cb3d76320 100644 --- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll +++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll @@ -9,6 +9,15 @@ define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_ev ; GCN-NEXT: .cfi_sections .debug_frame ; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: ; %bb +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 +; GCN-NEXT: .cfi_undefined 2564 +; GCN-NEXT: .cfi_undefined 36 +; GCN-NEXT: .cfi_undefined 37 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 00214ef36e1f0..3c3a2f11fc96a 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -231,8 +231,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_getpc_b64 s[4:5] @@ -382,8 +382,8 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32@gotpcrel32@hi+12 @@ -450,8 +450,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v42, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir index 9d25df4738709..cfa0ee97e83d0 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -17,6 +17,8 @@ body: | ; CHECK-LABEL: name: spill_a64_kill ; CHECK: liveins: $agpr0_agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -42,6 +44,8 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub1_killed ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -65,6 +69,8 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub0_killed ; CHECK: liveins: $agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -84,7 +90,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_a32_undef - ; CHECK: S_ENDPGM 0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_ENDPGM 0 SI_SPILL_A32_SAVE undef $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -101,7 +109,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_a64_undef - ; CHECK: S_ENDPGM 0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_ENDPGM 0 SI_SPILL_A64_SAVE undef $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index 3f6956b83ae92..d4241fb0c53f1 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -38,6 +38,12 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 @@ -82,6 +88,12 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 @@ -141,6 +153,12 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1 ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -181,6 +199,12 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1 ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -253,6 +277,9 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX908-EXPANDED-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX908-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) @@ -319,6 +346,9 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX90A-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc @@ -402,6 +432,14 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -444,6 +482,14 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -503,6 +549,16 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -547,6 +603,16 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -608,6 +674,18 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -654,6 +732,18 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -717,6 +807,20 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -765,6 +869,20 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -830,6 +948,24 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -882,6 +1018,24 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -951,6 +1105,26 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1005,6 +1179,26 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1076,6 +1270,28 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1132,6 +1348,28 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1205,6 +1443,30 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1263,6 +1525,30 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1338,6 +1624,32 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1398,6 +1710,32 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1475,6 +1813,40 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1543,6 +1915,40 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1628,6 +2034,72 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1728,6 +2200,72 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index f4edafd9443ab..be5295cf2affd 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -22,8 +22,17 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_subreg ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF @@ -60,8 +69,16 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_kill ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF @@ -95,6 +112,10 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_subreg ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -125,6 +146,9 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_kill ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir index 6e8a5126ca823..cfa09c149e4c6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir @@ -21,6 +21,12 @@ body: | ; GCN-LABEL: name: shift_back_exec_copy_reserved_reg ; GCN: liveins: $sgpr30_sgpr31, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -58,6 +64,14 @@ body: | ; GCN-LABEL: name: spill_exec_copy_reserved_reg ; GCN: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 ; GCN-NEXT: $sgpr40_sgpr41 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir index 52593e01eafde..da80320bc1af1 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir @@ -16,6 +16,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 @@ -41,6 +47,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -68,6 +80,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_3_of_4 ; GCN: liveins: $agpr28, $agpr29, $agpr30, $agpr31, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -97,6 +115,16 @@ body: | ; GCN-LABEL: name: full_spill_v128 ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -126,6 +154,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_1_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 @@ -151,6 +185,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_2_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -178,6 +218,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -207,6 +253,16 @@ body: | ; GCN-LABEL: name: full_spill_a128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir index 0c694d9f49e18..79a95cbf52391 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir +++ b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir @@ -16,6 +16,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 @@ -62,6 +65,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 @@ -110,6 +116,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.mir b/llvm/test/CodeGen/AMDGPU/spillv16.mir index 05569bf394c43..7be0bfa3e3fc8 100644 --- a/llvm/test/CodeGen/AMDGPU/spillv16.mir +++ b/llvm/test/CodeGen/AMDGPU/spillv16.mir @@ -32,6 +32,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, addrspace 5) ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll index 109c7d638f924..dabdc95b73fa5 100644 --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -13,6 +13,8 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp0: ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 @@ -35,6 +37,12 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp2: ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 @@ -65,6 +73,8 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp8: ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 @@ -83,6 +93,8 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0 ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp10: ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 @@ -103,6 +115,8 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp12: ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 @@ -121,6 +135,8 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg ! ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp14: ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 @@ -139,6 +155,8 @@ define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp16: ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 9cb22dad86b88..cf827945fb5f7 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -32,7 +32,6 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 { ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 144 %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 @@ -46,6 +45,8 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x3c0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffffc00 +; GCN-NEXT: s_mov_b32 s5, s34 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -58,8 +59,6 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v1, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 3 -; GCN-NEXT: s_mov_b32 s5, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x2800 ; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -71,7 +70,6 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 160 %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 @@ -86,6 +84,8 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x7c0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff800 +; GCN-NEXT: s_mov_b32 s5, s34 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -98,8 +98,6 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v1, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 3 -; GCN-NEXT: s_mov_b32 s5, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x3000 ; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -111,7 +109,6 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 192 %alloca.align16 = alloca [8 x <4 x i32>], align 32, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 32 @@ -125,10 +122,10 @@ define void @force_realign4(i32 %idx) #1 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffff00 -; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GCN-NEXT: s_mov_b32 s5, s34 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GCN-NEXT: s_addk_i32 s32, 0xd00 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_mov_b32_e32 v1, 3 @@ -138,7 +135,6 @@ define void @force_realign4(i32 %idx) #1 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 52 %alloca.align16 = alloca [8 x i32], align 4, addrspace(5) %gep0 = getelementptr inbounds [8 x i32], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile i32 3, ptr addrspace(5) %gep0, align 4 @@ -295,9 +291,9 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:1028 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: v_mov_b32_e32 v32, 0 ; GCN-NEXT: v_writelane_b32 v40, s34, 3 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_mov_b32_e32 v32, 0 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:1024 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s34 @@ -346,8 +342,8 @@ define i32 @needs_align1024_stack_args_used_inside_loop(ptr addrspace(5) nocaptu ; GCN-NEXT: s_mov_b32 s11, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xffc0 ; GCN-NEXT: s_mov_b32 s14, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s34 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s10, 0 @@ -416,12 +412,12 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 { ; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s41, s34 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_mov_b32 s40, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GCN-NEXT: s_mov_b32 s41, s34 +; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: s_mov_b32 s34, s41 diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll index ebd4bc881f2af..6be2c490e3ea8 100644 --- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll @@ -184,8 +184,8 @@ define void @outgoing_f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] -; GFX7-NEXT: flat_load_ushort v0, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 +; GFX7-NEXT: flat_load_ushort v0, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 ; GFX7-NEXT: s_mov_b32 s17, f16_user@abs32@hi ; GFX7-NEXT: s_mov_b32 s16, f16_user@abs32@lo @@ -218,8 +218,8 @@ define void @outgoing_v2f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] -; GFX7-NEXT: flat_load_dword v1, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 +; GFX7-NEXT: flat_load_dword v1, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 ; GFX7-NEXT: s_mov_b32 s17, v2f16_user@abs32@hi ; GFX7-NEXT: s_mov_b32 s16, v2f16_user@abs32@lo diff --git a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll index c4af66e922e8d..eecc9f22db415 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll @@ -10,8 +10,8 @@ define void @test_load_zext() { ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 ; CHECK-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[2:3] -; CHECK-NEXT: s_add_i32 s32, s32, 16 ; CHECK-NEXT: v_writelane_b32 v40, s0, 2 +; CHECK-NEXT: s_add_i32 s32, s32, 16 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: s_add_u32 s0, s0, has_spgr_args@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s1, s1, has_spgr_args@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll index 242b5e9aeaf42..153ea2957dd75 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll @@ -51,8 +51,8 @@ define void @indirect_tail_call_i32_inreg_divergent(i32 %vgpr) { ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, constant@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, constant@rel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir index cc261b0da4a8f..f4dc2aeb3e848 100644 --- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir +++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir @@ -19,8 +19,12 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 @@ -52,9 +56,15 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 256 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF @@ -91,8 +101,13 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 @@ -123,8 +138,14 @@ body: | ; GCN-LABEL: name: wwm_csr_spill_reload ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir index 4122a530ee861..5b330e892aa34 100644 --- a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir @@ -18,6 +18,9 @@ body: | ; GCN-LABEL: name: vgpr_use_after_prolog_spill ; GCN: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 @@ -42,6 +45,9 @@ body: | ; GCN-LABEL: name: livein_vgpr_def_after_prolog_spill ; GCN: liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 @@ -65,6 +71,9 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index d80ec6bd34945..dbfb054c7d164 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -13,8 +13,8 @@ define internal fastcc void @widget() { ; GFX90A-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] -; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: s_getpc_b64 s[16:17] ; GFX90A-NEXT: s_add_u32 s16, s16, wobble@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s17, s17, wobble@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll index 321b64510c35f..f137f429ebe26 100644 --- a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll @@ -648,12 +648,12 @@ define i32 @s_in_multiuse_A(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_mov_b32 exec_lo, s16 ; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 ; GCN-NEXT: s_xor_b32 s0, s0, s1 ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: s_mov_b32 s34, s1 @@ -693,16 +693,16 @@ define i32 @s_in_multiuse_B(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_or_saveexec_b32 s16, -1 ; GCN-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b32 exec_lo, s16 +; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s2, 4 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GCN-NEXT: s_xor_b32 s0, s0, s1 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: s_mov_b32 s34, s1 diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir index 1e815f76ee149..dd7d96f9d6e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -39,11 +39,43 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -66,6 +98,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0 ; @@ -74,11 +107,43 @@ body: | ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -104,6 +169,7 @@ body: | ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir index 2fac3d29cb0dc..613963403cc67 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -19,6 +19,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_1_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 @@ -44,6 +50,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_2_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -71,6 +83,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -100,6 +118,20 @@ body: | ; GCN-LABEL: name: full_spill_a128_restore_to_v128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -129,6 +161,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 @@ -154,6 +192,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -181,6 +225,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_3_of_4 ; GCN: liveins: $agpr24, $agpr25, $agpr30, $agpr31, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -210,6 +260,20 @@ body: | ; GCN-LABEL: name: full_spill_v128_restore_to_a128 ; GCN: liveins: $agpr4, $agpr5, $agpr6, $agpr7, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir index 572a875941b22..00c0f230d141a 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -26,6 +26,8 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) @@ -44,6 +46,8 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec @@ -63,6 +67,8 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) @@ -81,6 +87,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) @@ -125,6 +133,8 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5) @@ -144,6 +154,8 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec @@ -163,6 +175,8 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) @@ -181,6 +195,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5) @@ -224,6 +240,9 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -241,6 +260,9 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec @@ -259,6 +281,9 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) @@ -276,6 +301,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -318,6 +346,10 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5) @@ -336,6 +368,10 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec @@ -354,6 +390,10 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) @@ -371,6 +411,10 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5) @@ -415,6 +459,71 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -436,6 +545,71 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -458,6 +632,71 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -479,6 +718,71 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -527,6 +831,72 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -549,6 +919,72 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -571,6 +1007,72 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -592,6 +1094,72 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -641,6 +1209,73 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -664,6 +1299,73 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -686,6 +1388,73 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -707,6 +1476,73 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -757,6 +1593,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -778,6 +1678,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -800,6 +1764,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -821,6 +1849,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -869,6 +1961,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -891,6 +2047,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -913,6 +2133,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -934,6 +2218,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -983,6 +2331,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -1006,6 +2418,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -1028,6 +2504,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -1049,6 +2589,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -1098,6 +2702,71 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -1115,6 +2784,71 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1135,6 +2869,71 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1155,6 +2954,71 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -1202,6 +3066,71 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1223,6 +3152,71 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc @@ -1247,6 +3241,71 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc @@ -1271,6 +3330,71 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1319,6 +3443,9 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: $vcc_lo = S_MOV_B32 8200 @@ -1339,6 +3466,9 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1360,6 +3490,9 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1381,6 +3514,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; VMEM-GFX8-NEXT: $vcc_lo = S_MOV_B32 8200 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir index edea344a66a3c..8862c17f8e7a5 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -17,6 +17,8 @@ body: | ; CHECK-LABEL: name: spill_v32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit $vgpr0 SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) @@ -39,6 +41,8 @@ body: | ; CHECK-LABEL: name: spill_v32_kill ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ... @@ -59,6 +63,8 @@ body: | ; CHECK-LABEL: name: spill_v64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit $vgpr0_vgpr1 @@ -82,6 +88,8 @@ body: | ; CHECK-LABEL: name: spill_v64_kill ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -105,6 +113,8 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub1_killed ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -126,6 +136,8 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub0_killed ; CHECK: liveins: $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -147,6 +159,8 @@ body: | ; CHECK-LABEL: name: spill_v128_kill ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -166,7 +180,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_v32_undef - ; CHECK: S_NOP 0, implicit undef $vgpr0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_NOP 0, implicit undef $vgpr0 SI_SPILL_V32_SAVE undef $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) S_NOP 0, implicit undef $vgpr0 ... @@ -183,7 +199,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_v64_undef - ; CHECK: S_NOP 0, implicit undef $vgpr0_vgpr1 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_NOP 0, implicit undef $vgpr0_vgpr1 SI_SPILL_V64_SAVE undef $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) S_NOP 0, implicit undef $vgpr0_vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 14f222a8c8e17..5beb2237466a8 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -16,6 +16,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v44, s4, 2 ; GFX9-NEXT: v_mov_b32_e32 v36, v16 ; GFX9-NEXT: v_mov_b32_e32 v35, v15 ; GFX9-NEXT: v_mov_b32_e32 v34, v14 @@ -35,7 +36,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v44, s4, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 @@ -72,6 +72,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v44, s4, 2 ; GFX10-NEXT: v_mov_b32_e32 v36, v16 ; GFX10-NEXT: v_mov_b32_e32 v35, v15 ; GFX10-NEXT: v_mov_b32_e32 v34, v14 @@ -91,12 +92,12 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v44, s4, 2 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -129,6 +130,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 ; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 ; GFX11-NEXT: v_mov_b32_e32 v32, v12 @@ -147,12 +149,11 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -206,6 +207,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v45, s4, 2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -218,7 +220,6 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: v_mov_b32_e32 v40, v12 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:44], s[4:11], s[4:7] dmask:0x1 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v45, s4, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 @@ -256,6 +257,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v45, s4, 2 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -263,18 +265,18 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v45, s4, 2 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_mov_b32_e32 v40, v16 +; GFX10-NEXT: v_writelane_b32 v45, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v40, v16 ; GFX10-NEXT: v_mov_b32_e32 v41, v15 -; GFX10-NEXT: v_writelane_b32 v45, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, v14 +; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v43, v13 ; GFX10-NEXT: v_mov_b32_e32 v44, v12 -; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -306,6 +308,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:20 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v45, s0, 2 ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 @@ -314,16 +317,15 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_store_b32 off, v44, s33 ; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: v_writelane_b32 v45, s0, 2 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 -; GFX11-NEXT: v_dual_mov_b32 v40, v16 :: v_dual_mov_b32 v41, v15 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_writelane_b32 v45, s30, 0 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_dual_mov_b32 v40, v16 :: v_dual_mov_b32 v41, v15 ; GFX11-NEXT: v_dual_mov_b32 v42, v14 :: v_dual_mov_b32 v43, v13 -; GFX11-NEXT: v_mov_b32_e32 v44, v12 ; GFX11-NEXT: v_writelane_b32 v45, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v44, v12 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 28c6b40554bb6..fe3a6c59f1728 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -3076,13 +3076,13 @@ define void @callee_no_stack_with_call() #1 { ; GFX1032-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_mov_b32 exec_lo, s17 -; GFX1032-NEXT: s_addk_i32 s32, 0x200 ; GFX1032-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1032-NEXT: s_addk_i32 s32, 0x200 ; GFX1032-NEXT: s_getpc_b64 s[16:17] ; GFX1032-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 -; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX1032-NEXT: v_writelane_b32 v40, s30, 0 +; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX1032-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3107,13 +3107,13 @@ define void @callee_no_stack_with_call() #1 { ; GFX1064-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, s[18:19] -; GFX1064-NEXT: s_addk_i32 s32, 0x400 ; GFX1064-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1064-NEXT: s_addk_i32 s32, 0x400 ; GFX1064-NEXT: s_getpc_b64 s[16:17] ; GFX1064-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 -; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX1064-NEXT: v_writelane_b32 v40, s30, 0 +; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX1064-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir index adba762235d8c..9b4bd18b986e2 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir @@ -26,8 +26,13 @@ body: | ; CHECK-LABEL: name: save_inactive_lanes_non_csr_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc @@ -64,8 +69,12 @@ body: | ; CHECK-LABEL: name: save_all_lanes_csr_vgpr ; CHECK: liveins: $vgpr40 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 @@ -101,8 +110,13 @@ body: | ; CHECK-LABEL: name: save_csr_sgpr_to_non_csr_vgpr ; CHECK: liveins: $sgpr20, $vgpr191, $vgpr192 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr192, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192 ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec @@ -144,8 +158,12 @@ body: | ; CHECK-LABEL: name: save_csr_sgpr_to_csr_vgpr ; CHECK: liveins: $sgpr20, $vgpr191 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; CHECK-NEXT: $vcc_lo = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr191, 0 ; CHECK-NEXT: $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191 ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0 @@ -193,11 +211,20 @@ body: | ; CHECK-LABEL: name: vgpr_and_sgpr_csr ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr49, 256 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 128 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40 @@ -250,11 +277,21 @@ body: | ; CHECK-LABEL: name: split_orig_exec ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr49, 256 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 128 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 ; CHECK-NEXT: $sgpr3 = COPY $vcc_lo @@ -300,16 +337,32 @@ body: | ; CHECK-LABEL: name: vgpr_superregs ; CHECK: liveins: $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr40, $vgpr41, $vgpr42 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 128 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr3, 256 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr4, 384 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr5, 512 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 640 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 768 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 896 ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42 ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) @@ -360,6 +413,9 @@ body: | ; CHECK-LABEL: name: dont_restore_used_vgprs ; CHECK: liveins: $vgpr0, $vgpr20, $vgpr40 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40 ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 @@ -398,9 +454,16 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr1, 128 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $exec_lo ; CHECK-NEXT: V_CMPX_EQ_U32_nosdst_e64 $vgpr0, $vgpr1, implicit-def $exec, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index a42c8ac706d27..61a15747ae033 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -20,6 +20,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -44,6 +45,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -68,6 +70,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -93,6 +96,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -115,6 +119,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -147,6 +152,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -171,6 +177,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -195,6 +202,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -219,6 +227,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -240,6 +249,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -364,8 +374,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill @@ -403,8 +416,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v2, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill @@ -442,8 +458,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill @@ -482,8 +501,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill @@ -519,8 +541,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -908,6 +933,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -938,6 +964,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -968,6 +995,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -998,6 +1026,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1025,6 +1054,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1069,8 +1099,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -1099,8 +1132,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -1129,8 +1165,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -1161,8 +1200,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -1190,8 +1232,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1227,10 +1272,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x5 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9 @@ -1263,10 +1313,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GISEL-NEXT: s_xor_saveexec_b32 s34, -1 ; GISEL-NEXT: s_clause 0x5 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_mov_b32 s0, s5 @@ -1304,10 +1359,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x5 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v4, s4 @@ -1343,10 +1403,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GISEL64-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GISEL64-NEXT: s_clause 0x5 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_mov_b32 s0, s5 @@ -1383,10 +1448,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x5 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1431,164 +1501,303 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 +; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL-NEXT: v_swap_b32 v0, v1 ; DAGISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 +; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 ; DAGISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 ; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1767,164 +1976,303 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v40, s0, 3 +; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 ; GISEL-NEXT: v_swap_b32 v0, v1 ; GISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL-NEXT: v_writelane_b32 v40, s4, 0 +; GISEL-NEXT: v_writelane_b32 v40, s30, 1 ; GISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL-NEXT: v_writelane_b32 v40, s30, 1 ; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2103,164 +2451,303 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v40, s0, 4 +; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL64-NEXT: v_swap_b32 v0, v1 ; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 +; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -2441,164 +2928,303 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v40, s0, 4 +; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL64-NEXT: v_mov_b32_e32 v2, v0 ; GISEL64-NEXT: v_swap_b32 v0, v1 ; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 +; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; GISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -2776,933 +3402,1830 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s33 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s33 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s33 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s33 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s33 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s33 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s33 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s33 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s33 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s33 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s33 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s33 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s33 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s33 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s33 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s33 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s33 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s33 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s33 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s33 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s33 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s33 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s33 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s33 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s33 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s33 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s33 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s33 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s33 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s33 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s33 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s33 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s33 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s33 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s33 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s33 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s33 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s33 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s33 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s33 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s33 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s33 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s33 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s33 offset:752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s33 offset:756 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s33 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s33 offset:764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s33 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s33 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s33 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s33 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s33 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s33 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s33 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s33 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s33 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s33 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s33 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s33 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s33 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s33 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s33 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s33 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s33 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s33 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s33 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s33 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s33 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s33 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s33 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s33 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s33 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s33 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s33 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s33 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s33 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s33 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s33 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s33 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s33 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s33 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s33 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s33 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s33 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s33 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s33 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s33 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s33 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s33 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s33 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s33 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s33 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s33 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s33 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s33 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s33 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s33 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s33 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s33 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s33 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s33 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s33 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s33 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s33 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s33 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s33 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s33 offset:1004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s33 offset:1008 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s33 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s33 offset:1016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s33 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s33 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s33 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s33 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s33 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s33 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s33 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s33 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s33 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s33 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s33 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s33 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s33 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s33 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s33 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s33 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s33 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s33 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s33 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s33 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s33 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s33 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s33 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s33 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s33 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s33 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s33 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s33 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s33 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s33 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s33 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s33 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s33 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s33 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s33 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s33 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s33 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s33 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s33 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s33 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s33 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s33 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s33 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s33 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s33 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s33 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s33 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s33 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s33 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s33 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s33 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s33 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s33 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s33 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s33 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s33 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s33 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s33 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s33 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s33 offset:1256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s33 offset:1260 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s33 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s33 offset:1268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s33 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s33 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s33 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s33 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s33 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s33 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s33 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s33 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s33 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s33 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s33 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s33 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s33 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s33 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s33 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s33 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s33 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s33 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s33 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s33 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s33 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s33 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s33 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s33 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s33 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s33 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s33 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s33 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s33 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s33 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s33 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s33 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s33 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s33 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s33 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s33 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s33 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s33 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s33 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s33 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s33 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s33 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s33 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s33 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s33 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s33 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s33 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s33 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s33 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s33 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s33 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s33 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s33 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s33 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s33 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s33 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s33 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s33 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s33 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s33 offset:1508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s33 offset:1512 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s33 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s33 offset:1520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s33 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s33 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s33 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s33 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s33 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s33 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s33 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s33 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s33 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s33 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s33 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s33 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s33 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s33 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s33 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s33 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s33 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s33 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s33 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s33 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s33 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s33 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s33 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s33 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s33 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s33 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s33 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s33 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s33 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s33 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s33 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s33 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s33 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s33 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s33 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s33 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s33 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s33 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s33 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s33 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s33 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s33 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s33 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s33 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s33 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s33 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s33 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s33 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s33 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s33 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s33 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s33 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s33 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s33 offset:1760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s33 offset:1764 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s33 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s33 offset:1772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s33 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s33 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s33 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s33 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s33 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s33 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s33 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s33 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s33 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s33 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s33 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s33 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s33 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s33 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s33 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s33 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s33 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s33 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s33 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s33 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s33 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s33 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s33 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s33 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s33 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s33 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s33 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s33 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s33 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s33 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s33 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s33 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s33 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s33 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s33 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s33 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s33 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s33 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s33 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s33 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s33 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s33 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s33 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s33 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s33 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s33 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s33 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s33 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s33 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s33 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s33 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s33 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s33 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s33 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s33 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s33 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s33 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s33 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s33 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s33 offset:2012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s33 offset:2016 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s33 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s33 offset:2024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s33 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s33 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s33 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s33 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s33 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s33 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s33 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s33 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s33 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s33 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s33 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s33 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s33 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s33 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s33 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s33 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s33 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s33 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s33 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s33 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s33 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s33 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s33 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s33 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s33 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s33 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s33 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s33 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s33 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s33 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s33 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s33 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s33 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s33 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s33 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s33 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s33 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s33 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s33 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s33 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s33 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s33 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s33 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s33 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s33 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s33 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s33 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s33 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s33 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s33 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s33 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s33 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s33 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s33 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s33 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s33 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s33 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s33 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s33 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s33 offset:2264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s33 offset:2268 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s33 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s33 offset:2276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s33 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s33 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s33 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s33 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s33 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s33 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s33 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s33 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s33 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s33 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s33 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s33 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s33 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s33 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s33 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s33 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s33 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s33 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s33 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s33 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s33 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s33 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s33 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s33 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s33 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s33 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s33 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s33 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s33 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s33 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s33 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s33 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s33 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s33 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s33 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s33 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s33 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s33 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s33 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s33 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s33 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s33 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s33 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s33 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s33 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s33 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s33 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s33 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s33 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s33 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s33 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s33 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s33 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s33 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s33 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s33 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s33 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s33 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s33 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s33 offset:2516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s33 offset:2520 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s33 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s33 offset:2528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s33 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s33 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s33 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s33 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s33 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s33 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s33 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s33 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s33 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s33 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s33 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s33 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s33 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s33 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s33 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s33 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s33 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s33 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s33 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s33 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s33 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s33 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s33 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s33 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s33 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s33 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s33 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s33 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s33 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s33 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s33 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s33 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s33 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s33 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s33 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s33 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s33 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s33 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s33 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s33 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s33 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s33 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s33 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s33 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s33 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s33 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s33 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s33 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s33 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s33 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s33 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s33 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s33 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s33 offset:2768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s33 offset:2772 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s33 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s33 offset:2780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s33 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s33 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s33 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s33 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s33 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s33 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s33 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s33 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s33 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s33 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s33 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s33 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s33 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s33 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s33 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s33 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s33 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s33 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s33 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s33 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s33 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s33 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s33 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s33 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s33 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s33 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s33 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s33 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s33 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s33 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s33 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s33 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s33 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s33 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s33 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s33 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s33 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s33 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s33 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s33 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s33 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s33 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s33 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s33 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s33 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s33 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s33 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s33 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s33 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s33 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s33 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s33 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s33 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s33 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s33 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s33 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s33 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s33 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s33 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s33 offset:3020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s33 offset:3024 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s33 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s33 offset:3032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s33 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s33 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s33 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s33 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s33 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s33 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s33 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s33 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s33 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s33 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s33 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s33 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s33 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s33 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s33 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s33 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s33 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s33 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s33 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s33 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s33 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s33 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s33 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s33 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s33 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s33 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s33 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s33 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s33 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s33 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s33 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s33 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s33 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s33 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s33 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s33 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s33 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s33 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s33 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s33 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s33 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s33 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s33 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s33 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s33 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s33 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s33 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s33 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s33 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s33 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s33 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s33 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s33 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s33 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s33 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s33 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s33 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s33 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s33 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s33 offset:3272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s33 offset:3276 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s33 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s33 offset:3284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s33 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s33 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s33 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s33 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s33 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s33 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s33 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s33 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s33 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s33 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s33 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s33 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s33 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s33 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s33 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s33 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s33 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s33 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s33 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s33 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s33 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s33 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s33 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s33 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s33 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s33 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s33 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s33 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s33 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s33 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s33 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s33 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s33 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s33 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s33 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s33 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s33 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s33 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s33 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s33 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s33 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s33 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s33 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s33 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s33 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s33 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s33 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s33 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s33 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s33 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s33 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s33 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s33 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s33 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s33 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s33 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s33 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s33 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s33 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s33 offset:3524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s33 offset:3528 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s33 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s33 offset:3536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s33 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s33 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s33 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s33 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s33 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s33 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s33 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s33 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s33 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s33 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s33 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s33 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s33 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s33 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s33 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s33 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s33 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s33 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s33 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s33 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s33 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s33 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s33 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s33 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s33 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s33 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s33 offset:3644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -3710,12 +5233,12 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1 ; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], gfx_callee@abs64 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 -; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 +; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] ; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4679,152 +6202,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 @@ -4995,152 +6657,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 @@ -5311,152 +7112,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 @@ -5627,152 +7567,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: v_mov_b32_e32 v2, v0 @@ -5940,933 +8019,1830 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s32 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s32 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s32 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s32 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s32 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s32 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s32 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s32 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s32 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s32 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s32 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s32 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s32 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s32 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s32 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s32 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s32 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s32 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s32 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s32 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s32 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s32 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s32 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s32 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s32 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s32 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s32 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s32 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s32 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s32 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s32 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s32 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s32 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s32 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s32 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s32 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s32 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s32 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s32 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s32 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s32 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s32 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s32 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s32 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s32 offset:752 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s32 offset:756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s32 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s32 offset:764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s32 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s32 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s32 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s32 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s32 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s32 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s32 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s32 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s32 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s32 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s32 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s32 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s32 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s32 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s32 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s32 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s32 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s32 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s32 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s32 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s32 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s32 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s32 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s32 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s32 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s32 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s32 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s32 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s32 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s32 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s32 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s32 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s32 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s32 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s32 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s32 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s32 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s32 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s32 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s32 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s32 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s32 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s32 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s32 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s32 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s32 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s32 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s32 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s32 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s32 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s32 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s32 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s32 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s32 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s32 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s32 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s32 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s32 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s32 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s32 offset:1004 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s32 offset:1008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s32 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s32 offset:1016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s32 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s32 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s32 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s32 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s32 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s32 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s32 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s32 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s32 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s32 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s32 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s32 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s32 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s32 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s32 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s32 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s32 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s32 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s32 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s32 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s32 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s32 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s32 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s32 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s32 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s32 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s32 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s32 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s32 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s32 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s32 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s32 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s32 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s32 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s32 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s32 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s32 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s32 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s32 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s32 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s32 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s32 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s32 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s32 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s32 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s32 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s32 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s32 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s32 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s32 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s32 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s32 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s32 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s32 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s32 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s32 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s32 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s32 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s32 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s32 offset:1256 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s32 offset:1260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s32 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s32 offset:1268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s32 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s32 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s32 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s32 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s32 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s32 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s32 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s32 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s32 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s32 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s32 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s32 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s32 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s32 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s32 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s32 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s32 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s32 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s32 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s32 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s32 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s32 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s32 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s32 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s32 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s32 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s32 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s32 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s32 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s32 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s32 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s32 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s32 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s32 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s32 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s32 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s32 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s32 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s32 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s32 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s32 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s32 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s32 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s32 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s32 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s32 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s32 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s32 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s32 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s32 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s32 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s32 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s32 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s32 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s32 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s32 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s32 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s32 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s32 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s32 offset:1508 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s32 offset:1512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s32 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s32 offset:1520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s32 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s32 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s32 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s32 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s32 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s32 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s32 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s32 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s32 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s32 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s32 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s32 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s32 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s32 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s32 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s32 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s32 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s32 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s32 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s32 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s32 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s32 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s32 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s32 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s32 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s32 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s32 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s32 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s32 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s32 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s32 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s32 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s32 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s32 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s32 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s32 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s32 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s32 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s32 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s32 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s32 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s32 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s32 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s32 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s32 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s32 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s32 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s32 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s32 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s32 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s32 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s32 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s32 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s32 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s32 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s32 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s32 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s32 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s32 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s32 offset:1760 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s32 offset:1764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s32 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s32 offset:1772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s32 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s32 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s32 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s32 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s32 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s32 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s32 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s32 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s32 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s32 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s32 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s32 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s32 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s32 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s32 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s32 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s32 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s32 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s32 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s32 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s32 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s32 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s32 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s32 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s32 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s32 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s32 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s32 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s32 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s32 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s32 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s32 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s32 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s32 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s32 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s32 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s32 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s32 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s32 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s32 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s32 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s32 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s32 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s32 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s32 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s32 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s32 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s32 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s32 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s32 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s32 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s32 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s32 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s32 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s32 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s32 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s32 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s32 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s32 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s32 offset:2012 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s32 offset:2016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s32 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s32 offset:2024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s32 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s32 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s32 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s32 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s32 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s32 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s32 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s32 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s32 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s32 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s32 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s32 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s32 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s32 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s32 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s32 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s32 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s32 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s32 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s32 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s32 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s32 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s32 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s32 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s32 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s32 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s32 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s32 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s32 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s32 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s32 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s32 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s32 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s32 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s32 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s32 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s32 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s32 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s32 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s32 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s32 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s32 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s32 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s32 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s32 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s32 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s32 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s32 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s32 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s32 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s32 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s32 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s32 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s32 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s32 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s32 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s32 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s32 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s32 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s32 offset:2264 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s32 offset:2268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s32 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s32 offset:2276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s32 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s32 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s32 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s32 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s32 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s32 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s32 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s32 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s32 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s32 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s32 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s32 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s32 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s32 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s32 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s32 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s32 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s32 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s32 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s32 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s32 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s32 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s32 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s32 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s32 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s32 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s32 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s32 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s32 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s32 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s32 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s32 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s32 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s32 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s32 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s32 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s32 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s32 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s32 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s32 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s32 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s32 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s32 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s32 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s32 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s32 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s32 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s32 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s32 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s32 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s32 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s32 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s32 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s32 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s32 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s32 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s32 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s32 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s32 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s32 offset:2516 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s32 offset:2520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s32 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s32 offset:2528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s32 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s32 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s32 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s32 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s32 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s32 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s32 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s32 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s32 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s32 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s32 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s32 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s32 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s32 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s32 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s32 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s32 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s32 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s32 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s32 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s32 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s32 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s32 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s32 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s32 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s32 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s32 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s32 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s32 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s32 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s32 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s32 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s32 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s32 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s32 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s32 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s32 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s32 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s32 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s32 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s32 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s32 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s32 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s32 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s32 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s32 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s32 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s32 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s32 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s32 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s32 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s32 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s32 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s32 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s32 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s32 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s32 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s32 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s32 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s32 offset:2768 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s32 offset:2772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s32 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s32 offset:2780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s32 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s32 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s32 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s32 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s32 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s32 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s32 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s32 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s32 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s32 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s32 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s32 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s32 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s32 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s32 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s32 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s32 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s32 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s32 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s32 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s32 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s32 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s32 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s32 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s32 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s32 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s32 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s32 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s32 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s32 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s32 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s32 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s32 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s32 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s32 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s32 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s32 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s32 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s32 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s32 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s32 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s32 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s32 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s32 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s32 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s32 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s32 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s32 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s32 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s32 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s32 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s32 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s32 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s32 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s32 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s32 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s32 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s32 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s32 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s32 offset:3020 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s32 offset:3024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s32 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s32 offset:3032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s32 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s32 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s32 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s32 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s32 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s32 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s32 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s32 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s32 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s32 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s32 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s32 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s32 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s32 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s32 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s32 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s32 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s32 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s32 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s32 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s32 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s32 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s32 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s32 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s32 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s32 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s32 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s32 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s32 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s32 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s32 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s32 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s32 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s32 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s32 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s32 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s32 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s32 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s32 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s32 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s32 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s32 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s32 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s32 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s32 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s32 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s32 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s32 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s32 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s32 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s32 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s32 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s32 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s32 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s32 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s32 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s32 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s32 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s32 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s32 offset:3272 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s32 offset:3276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s32 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s32 offset:3284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s32 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s32 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s32 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s32 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s32 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s32 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s32 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s32 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s32 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s32 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s32 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s32 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s32 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s32 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s32 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s32 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s32 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s32 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s32 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s32 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s32 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s32 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s32 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s32 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s32 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s32 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s32 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s32 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s32 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s32 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s32 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s32 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s32 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s32 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s32 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s32 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s32 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s32 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s32 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s32 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s32 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s32 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s32 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s32 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s32 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s32 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s32 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s32 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s32 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s32 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s32 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s32 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s32 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s32 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s32 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s32 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s32 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s32 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s32 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s32 offset:3524 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s32 offset:3528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s32 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s32 offset:3536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s32 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s32 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s32 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s32 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s32 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s32 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s32 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s32 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s32 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s32 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s32 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s32 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s32 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s32 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s32 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s32 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s32 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s32 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s32 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s32 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s32 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s32 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s32 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s32 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s32 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s32 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s32 offset:3644 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -7884,165 +10860,306 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_clause 0x2 ; DAGISEL-NEXT: scratch_store_b32 off, v42, s33 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 +; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo ; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 ; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -8225,165 +11342,306 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_clause 0x2 ; GISEL-NEXT: scratch_store_b32 off, v42, s33 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v42, s0, 3 +; GISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi ; GISEL-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 -; GISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GISEL-NEXT: v_writelane_b32 v42, s30, 1 ; GISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -8566,167 +11824,308 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_clause 0x2 ; DAGISEL64-NEXT: scratch_store_b32 off, v42, s33 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v42, s0, 4 +; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0 ; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi ; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo ; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 -; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 ; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1 +; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 ; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2 ; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -8910,167 +12309,308 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_clause 0x2 ; GISEL64-NEXT: scratch_store_b32 off, v42, s33 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v42, s0, 4 +; GISEL64-NEXT: v_writelane_b32 v42, s4, 0 ; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo ; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi ; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL64-NEXT: v_mov_b32_e32 v40, v8 -; GISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; GISEL64-NEXT: v_mov_b32_e32 v41, v9 ; GISEL64-NEXT: v_writelane_b32 v42, s5, 1 +; GISEL64-NEXT: v_mov_b32_e32 v41, v9 ; GISEL64-NEXT: v_writelane_b32 v42, s30, 2 ; GISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -9251,948 +12791,1847 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s33 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s33 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s33 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s33 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s33 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s33 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s33 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s33 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s33 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s33 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s33 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s33 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s33 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s33 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s33 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s33 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s33 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s33 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s33 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s33 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s33 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s33 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s33 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s33 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s33 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s33 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s33 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s33 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s33 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s33 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s33 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s33 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s33 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s33 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s33 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s33 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s33 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s33 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s33 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s33 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s33 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s33 offset:752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s33 offset:756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s33 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s33 offset:764 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s33 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s33 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s33 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s33 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s33 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s33 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s33 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s33 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s33 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s33 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s33 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s33 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s33 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s33 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s33 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s33 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s33 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s33 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s33 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s33 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s33 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s33 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s33 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s33 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s33 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s33 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s33 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s33 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s33 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s33 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s33 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s33 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s33 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s33 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s33 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s33 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s33 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s33 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s33 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s33 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s33 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s33 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s33 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s33 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s33 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s33 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s33 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s33 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s33 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s33 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s33 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s33 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s33 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s33 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s33 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s33 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s33 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s33 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s33 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s33 offset:1004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s33 offset:1008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s33 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s33 offset:1016 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s33 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s33 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s33 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s33 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s33 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s33 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s33 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s33 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s33 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s33 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s33 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s33 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s33 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s33 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s33 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s33 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s33 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s33 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s33 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s33 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s33 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s33 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s33 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s33 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s33 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s33 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s33 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s33 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s33 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s33 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s33 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s33 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s33 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s33 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s33 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s33 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s33 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s33 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s33 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s33 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s33 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s33 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s33 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s33 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s33 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s33 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s33 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s33 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s33 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s33 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s33 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s33 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s33 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s33 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s33 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s33 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s33 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s33 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s33 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s33 offset:1256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s33 offset:1260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s33 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s33 offset:1268 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s33 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s33 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s33 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s33 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s33 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s33 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s33 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s33 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s33 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s33 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s33 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s33 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s33 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s33 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s33 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s33 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s33 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s33 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s33 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s33 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s33 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s33 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s33 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s33 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s33 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s33 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s33 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s33 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s33 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s33 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s33 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s33 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s33 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s33 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s33 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s33 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s33 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s33 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s33 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s33 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s33 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s33 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s33 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s33 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s33 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s33 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s33 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s33 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s33 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s33 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s33 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s33 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s33 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s33 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s33 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s33 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s33 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s33 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s33 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s33 offset:1508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s33 offset:1512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s33 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s33 offset:1520 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s33 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s33 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s33 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s33 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s33 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s33 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s33 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s33 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s33 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s33 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s33 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s33 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s33 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s33 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s33 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s33 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s33 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s33 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s33 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s33 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s33 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s33 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s33 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s33 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s33 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s33 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s33 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s33 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s33 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s33 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s33 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s33 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s33 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s33 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s33 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s33 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s33 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s33 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s33 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s33 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s33 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s33 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s33 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s33 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s33 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s33 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s33 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s33 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s33 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s33 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s33 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s33 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s33 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s33 offset:1760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s33 offset:1764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s33 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s33 offset:1772 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s33 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s33 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s33 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s33 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s33 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s33 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s33 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s33 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s33 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s33 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s33 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s33 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s33 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s33 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s33 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s33 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s33 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s33 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s33 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s33 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s33 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s33 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s33 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s33 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s33 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s33 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s33 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s33 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s33 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s33 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s33 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s33 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s33 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s33 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s33 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s33 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s33 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s33 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s33 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s33 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s33 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s33 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s33 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s33 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s33 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s33 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s33 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s33 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s33 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s33 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s33 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s33 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s33 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s33 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s33 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s33 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s33 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s33 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s33 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s33 offset:2012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s33 offset:2016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s33 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s33 offset:2024 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s33 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s33 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s33 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s33 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s33 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s33 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s33 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s33 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s33 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s33 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s33 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s33 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s33 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s33 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s33 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s33 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s33 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s33 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s33 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s33 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s33 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s33 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s33 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s33 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s33 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s33 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s33 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s33 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s33 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s33 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s33 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s33 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s33 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s33 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s33 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s33 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s33 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s33 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s33 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s33 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s33 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s33 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s33 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s33 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s33 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s33 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s33 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s33 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s33 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s33 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s33 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s33 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s33 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s33 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s33 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s33 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s33 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s33 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s33 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s33 offset:2264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s33 offset:2268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s33 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s33 offset:2276 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s33 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s33 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s33 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s33 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s33 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s33 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s33 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s33 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s33 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s33 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s33 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s33 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s33 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s33 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s33 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s33 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s33 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s33 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s33 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s33 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s33 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s33 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s33 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s33 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s33 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s33 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s33 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s33 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s33 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s33 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s33 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s33 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s33 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s33 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s33 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s33 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s33 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s33 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s33 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s33 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s33 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s33 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s33 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s33 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s33 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s33 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s33 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s33 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s33 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s33 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s33 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s33 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s33 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s33 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s33 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s33 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s33 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s33 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s33 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s33 offset:2516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s33 offset:2520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s33 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s33 offset:2528 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s33 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s33 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s33 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s33 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s33 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s33 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s33 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s33 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s33 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s33 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s33 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s33 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s33 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s33 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s33 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s33 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s33 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s33 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s33 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s33 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s33 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s33 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s33 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s33 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s33 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s33 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s33 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s33 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s33 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s33 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s33 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s33 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s33 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s33 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s33 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s33 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s33 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s33 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s33 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s33 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s33 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s33 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s33 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s33 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s33 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s33 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s33 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s33 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s33 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s33 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s33 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s33 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s33 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s33 offset:2768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s33 offset:2772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s33 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s33 offset:2780 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s33 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s33 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s33 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s33 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s33 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s33 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s33 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s33 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s33 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s33 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s33 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s33 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s33 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s33 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s33 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s33 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s33 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s33 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s33 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s33 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s33 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s33 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s33 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s33 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s33 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s33 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s33 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s33 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s33 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s33 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s33 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s33 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s33 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s33 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s33 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s33 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s33 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s33 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s33 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s33 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s33 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s33 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s33 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s33 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s33 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s33 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s33 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s33 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s33 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s33 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s33 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s33 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s33 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s33 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s33 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s33 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s33 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s33 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s33 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s33 offset:3020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s33 offset:3024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s33 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s33 offset:3032 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s33 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s33 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s33 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s33 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s33 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s33 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s33 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s33 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s33 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s33 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s33 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s33 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s33 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s33 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s33 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s33 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s33 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s33 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s33 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s33 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s33 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s33 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s33 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s33 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s33 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s33 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s33 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s33 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s33 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s33 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s33 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s33 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s33 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s33 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s33 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s33 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s33 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s33 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s33 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s33 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s33 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s33 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s33 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s33 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s33 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s33 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s33 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s33 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s33 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s33 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s33 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s33 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s33 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s33 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s33 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s33 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s33 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s33 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s33 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s33 offset:3272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s33 offset:3276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s33 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s33 offset:3284 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s33 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s33 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s33 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s33 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s33 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s33 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s33 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s33 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s33 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s33 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s33 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s33 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s33 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s33 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s33 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s33 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s33 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s33 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s33 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s33 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s33 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s33 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s33 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s33 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s33 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s33 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s33 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s33 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s33 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s33 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s33 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s33 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s33 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s33 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s33 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s33 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s33 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s33 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s33 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s33 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s33 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s33 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s33 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s33 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s33 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s33 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s33 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s33 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s33 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s33 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s33 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s33 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s33 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s33 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s33 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s33 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s33 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s33 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s33 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s33 offset:3524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s33 offset:3528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s33 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s33 offset:3536 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s33 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s33 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s33 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s33 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s33 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s33 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s33 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s33 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s33 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s33 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s33 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s33 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s33 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s33 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s33 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s33 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s33 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s33 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s33 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s33 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s33 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s33 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s33 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s33 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s33 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s33 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s33 offset:3644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s33 offset:3648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s33 offset:3652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_clause 0x2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42, s33 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x2 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 ; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] diff --git a/llvm/test/DebugInfo/AMDGPU/cfi.ll b/llvm/test/DebugInfo/AMDGPU/cfi.ll index 686cf4b654e35..c7c23bc632fe7 100644 --- a/llvm/test/DebugInfo/AMDGPU/cfi.ll +++ b/llvm/test/DebugInfo/AMDGPU/cfi.ll @@ -15,6 +15,9 @@ ; CHECK-EMPTY: ; CHECK: 00000010 {{[0-9]+}} 00000000 FDE cie=00000000 pc=00000000...{{[0-9]+}} ; CHECK-NEXT: Format: DWARF32 +; CHECK-NEXT: DW_CFA_LLVM_def_aspace_cfa: SGPR32 +0 in addrspace6 +; CHECK-NEXT: DW_CFA_expression: PC_REG DW_OP_regx SGPR30, DW_OP_piece 0x4, DW_OP_regx SGPR31, DW_OP_piece 0x4 +; CHECK-NEXT: DW_CFA_nop: ; CHECK-EMPTY: ; CHECK: .eh_frame contents: ; CHECK-NOT: CIE diff --git a/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll b/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll index 1f13282a1f04c..a87ce1c79055a 100644 --- a/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll +++ b/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll @@ -4,6 +4,8 @@ ; Verify that the debug locations in this function are correct, in particular ; that the location for %cast doesn't appear in the block of %lab. + + define void @_Z12lane_pc_testj() #0 !dbg !9 { ; GCN-LABEL: _Z12lane_pc_testj: ; GCN: .Lfunc_begin0: @@ -12,6 +14,16 @@ define void @_Z12lane_pc_testj() #0 !dbg !9 { ; GCN-NEXT: .cfi_sections .debug_frame ; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 1536 +; GCN-NEXT: .cfi_undefined 1537 +; GCN-NEXT: .cfi_undefined 1538 +; GCN-NEXT: .cfi_undefined 36 +; GCN-NEXT: .cfi_undefined 37 +; GCN-NEXT: .cfi_undefined 38 +; GCN-NEXT: .cfi_undefined 39 +; GCN-NEXT: .cfi_undefined 40 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: ; %bb.1: ; %lab ; GCN-NEXT: s_mov_b64 s[4:5], 0 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected index a8c2531117f42..0a85133152679 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected @@ -69,9 +69,22 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .type .Lcheck_boundaries$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s8, s33 +; CHECK-NEXT: .cfi_register 65, 40 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -102,6 +115,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -111,9 +125,21 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .type .Lmain$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: .cfi_register 65, 38 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] @@ -139,6 +165,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected index 34530f2f632e2..df156b1b2e1b4 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected @@ -10,9 +10,22 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: .type .Lcheck_boundaries$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s8, s33 +; CHECK-NEXT: .cfi_register 65, 40 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -43,6 +56,7 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -88,9 +102,21 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: .type .Lmain$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: .cfi_register 65, 38 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] @@ -116,6 +142,7 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] From 77239134ccba6c86c51b61aab6cdfb59bf87e54a Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Wed, 25 Jun 2025 11:06:31 -0400 Subject: [PATCH 5/5] [AMDGPU] Implement CFI for CSR spills Introduce new SPILL pseudos to allow CFI to be generated for only CSR spills, and to make ISA-instruction-level accurate information. Other targets either generate slightly incorrect information or rely on conventions for how spills are placed within the entry block. The approach in this change produces larger unwind tables, with the increased size being spent on additional DW_CFA_advance_location instructions needed to describe the unwinding accurately. Co-authored-by: Scott Linder Co-authored-by: Venkata Ramanaiah Nalamothu --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 99 +- llvm/lib/Target/AMDGPU/SIFrameLowering.h | 22 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 137 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 19 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 18 + llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 90 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 223 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 15 +- .../CodeGen/AMDGPU/GlobalISel/assert-align.ll | 6 +- .../GlobalISel/call-outgoing-stack-args.ll | 28 +- .../CodeGen/AMDGPU/GlobalISel/localizer.ll | 8 +- .../test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll | 8 +- .../CodeGen/AMDGPU/a-v-global-atomicrmw.ll | 8 +- .../abi-attribute-hints-undefined-behavior.ll | 6 +- .../AMDGPU/accvgpr-spill-scc-clobber.mir | 2688 +++++ .../AMDGPU/agpr-copy-no-free-registers.ll | 7 +- .../CodeGen/AMDGPU/amdgcn-call-whole-wave.ll | 28 +- .../CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll | 8970 +++++++++++------ .../CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll | 10 +- .../CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll | 52 +- .../CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll | 2260 ++--- .../CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll | 228 +- .../CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll | 642 +- .../CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll | 706 +- .../CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll | 864 +- .../CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll | 1140 ++- .../CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll | 1492 ++- .../CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll | 1842 ++-- .../AMDGPU/amdgpu-cs-chain-preserve-cc.ll | 2 + .../amdgpu-simplify-libcall-pow-codegen.ll | 308 +- ...tor-flatscratchinit-undefined-behavior2.ll | 37 +- .../AMDGPU/av_spill_cross_bb_usage.mir | 12 + llvm/test/CodeGen/AMDGPU/bf16.ll | 422 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 156 +- llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 288 +- .../CodeGen/AMDGPU/call-argument-types.ll | 136 +- .../AMDGPU/call-graph-register-usage.ll | 2 +- .../AMDGPU/call-preserved-registers.ll | 120 +- .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 192 +- .../callee-special-input-vgprs-packed.ll | 46 +- .../AMDGPU/callee-special-input-vgprs.ll | 46 +- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 24 +- llvm/test/CodeGen/AMDGPU/debug-frame.ll | 506 +- .../AMDGPU/dwarf-multi-register-use-crash.ll | 82 +- .../dynamic-vgpr-reserve-stack-for-cwsr.ll | 18 +- .../eliminate-frame-index-s-mov-b32.mir | 96 + .../fix-frame-reg-in-custom-csr-spills.ll | 10 +- llvm/test/CodeGen/AMDGPU/frame-index.mir | 96 + ...frame-setup-without-sgpr-to-vgpr-spills.ll | 29 +- .../CodeGen/AMDGPU/function-args-inreg.ll | 29 +- .../CodeGen/AMDGPU/gfx-call-non-gfx-func.ll | 148 +- .../AMDGPU/gfx-callable-argument-types.ll | 4549 +++++---- .../gfx-callable-preserved-registers.ll | 239 +- .../AMDGPU/gfx-callable-return-types.ll | 222 +- llvm/test/CodeGen/AMDGPU/global-alias.ll | 2 +- .../identical-subrange-spill-infloop.ll | 92 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 1104 +- .../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 7 +- .../CodeGen/AMDGPU/insert-waitcnts-crash.ll | 17 +- .../AMDGPU/llvm.amdgcn.readfirstlane.ll | 32 +- llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 12 +- llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 12 +- .../materialize-frame-index-sgpr.gfx10.ll | 100 +- .../AMDGPU/materialize-frame-index-sgpr.ll | 1651 +-- llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll | 28 +- .../CodeGen/AMDGPU/memintrinsic-unroll.ll | 6 +- llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll | 28 +- .../CodeGen/AMDGPU/mul24-pass-ordering.ll | 26 +- .../AMDGPU/need-fp-from-vgpr-spills.ll | 6 +- llvm/test/CodeGen/AMDGPU/nested-calls.ll | 12 +- .../AMDGPU/no-source-locations-in-prologue.ll | 3 +- .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir | 1 + .../AMDGPU/pei-vgpr-block-spill-csr.mir | 320 + .../AMDGPU/preserve-wwm-copy-dst-reg.ll | 33 +- llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll | 13 +- .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 257 +- .../AMDGPU/sgpr-spills-split-regalloc.ll | 27 +- .../AMDGPU/shufflevector.v2i64.v8i64.ll | 442 +- .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 21 +- .../CodeGen/AMDGPU/si-lower-sgpr-spills.mir | 5 + llvm/test/CodeGen/AMDGPU/sibling-call.ll | 246 +- .../spill-partial-csr-sgpr-live-ins.mir | 5 + .../AMDGPU/spill-sgpr-csr-live-ins.mir | 1 + .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 16 + llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll | 6 +- .../spill_more_than_wavesize_csr_sgprs.ll | 2 +- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 18 +- .../CodeGen/AMDGPU/stacksave_stackrestore.ll | 22 +- .../AMDGPU/strictfp_f16_abi_promote.ll | 54 +- .../CodeGen/AMDGPU/swdev504645-global-fold.ll | 7 +- .../AMDGPU/tail-call-inreg-arguments.error.ll | 10 +- .../AMDGPU/tuple-allocation-failure.ll | 4 +- ...unfold-masked-merge-scalar-variablemask.ll | 38 +- .../unspill-vgpr-after-rewrite-vgpr-mfma.ll | 18 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 168 +- .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 81 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 12 +- .../CodeGen/AMDGPU/whole-wave-functions.ll | 104 +- .../AMDGPU/whole-wave-register-copy.ll | 4 +- .../AMDGPU/whole-wave-register-spill.ll | 6 +- .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 8 +- 101 files changed, 22312 insertions(+), 12206 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 5a0b1afbdfdff..bbde3c49f64c6 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -2244,17 +2244,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( return true; } +static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) { + if (MBB.isLiveIn(*R)) { + return true; + } + } + return false; +} + bool SIFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); const GCNSubtarget &ST = MF->getSubtarget(); - if (!ST.useVGPRBlockOpsForCSR()) - return false; + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *SITRI = static_cast(TRI); + + if (!ST.useVGPRBlockOpsForCSR()) { + for (const CalleeSavedInfo &CS : CSI) { + // Insert the spill to the stack frame. + unsigned Reg = CS.getReg(); + + if (CS.isSpilledToReg()) { + BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), + CS.getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32); + // If this value was already livein, we probably have a direct use of + // the incoming register value, so don't kill at the spill point. This + // happens since we pass some special inputs (workgroup IDs) in the + // callee saved range. + const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI); + TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(), + RC, TRI); + } + } + return true; + } MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const TargetRegisterClass *BlockRegClass = @@ -2278,10 +2310,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters( FrameInfo.getObjectAlign(FrameIndex)); BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE)) + TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE)) .addReg(Reg, getKillRegState(false)) .addFrameIndex(FrameIndex) - .addReg(MFI->getStackPtrOffsetReg()) + .addReg(FuncInfo->getStackPtrOffsetReg()) .addImm(0) .addImm(Mask) .addMemOperand(MMO); @@ -2467,6 +2499,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB, .setMIFlag(flag); } +MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register Reg, const Register RegCopy) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + + unsigned MaskReg = MCRI.getDwarfRegNum( + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false); + auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask( + nullptr, MCRI.getDwarfRegNum(Reg, false), + MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg, + ST.getWavefrontSize()); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const Register SGPR, const Register VGPR, @@ -2515,6 +2563,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); } +MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned SGPR, int64_t Offset) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + return buildCFI(MBB, MBBI, DL, + llvm::MCCFIInstruction::createOffset( + nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset)); +} + +MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, int64_t Offset) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + assert(DwarfVGPR != -1); + + unsigned MaskReg = MCRI.getDwarfRegNum( + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false); + auto CFIInst = MCCFIInstruction::createLLVMVectorOffset( + nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(), + Offset); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const Register Reg, const Register SGPRPair) const { @@ -2535,3 +2611,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill( nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize); return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); } + +MachineInstr * +SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false); + auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 20f608f2dfc24..2b716db0b7a22 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -120,6 +120,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering { const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const; + /// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another + /// VGPR/AGPR \p RegCopy and build a MachineInstr around it. + MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const Register Reg, + const Register RegCopy) const; /// Create a CFI index describing a spill of an SGPR to a single lane of /// a VGPR and build a MachineInstr around it. MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, @@ -134,10 +141,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering { MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register SGPR, ArrayRef VGPRSpills) const; + /// Create a CFI index describing a spill of a SGPR to VMEM and + /// build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned SGPR, + int64_t Offset) const; + /// Create a CFI index describing a spill of a VGPR to VMEM and + /// build a MachineInstr around it. + MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, + int64_t Offset) const; MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, Register SGPRPair) const; + MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg) const; // Returns true if the function may need to reserve space on the stack for the // CWSR trap handler. bool mayReserveScratchForCWSR(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d930a21c2d7f5..a097e721d142f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1530,22 +1530,26 @@ SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); } -static unsigned getSGPRSpillSaveOpcode(unsigned Size) { +static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_S32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE; case 8: - return AMDGPU::SI_SPILL_S64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE; case 12: - return AMDGPU::SI_SPILL_S96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE; case 16: - return AMDGPU::SI_SPILL_S128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE + : AMDGPU::SI_SPILL_S128_SAVE; case 20: - return AMDGPU::SI_SPILL_S160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE + : AMDGPU::SI_SPILL_S160_SAVE; case 24: - return AMDGPU::SI_SPILL_S192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S192_CFI_SAVE + : AMDGPU::SI_SPILL_S192_SAVE; case 28: - return AMDGPU::SI_SPILL_S224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S224_CFI_SAVE + : AMDGPU::SI_SPILL_S224_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 36: @@ -1557,69 +1561,90 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) { case 48: return AMDGPU::SI_SPILL_S384_SAVE; case 64: - return AMDGPU::SI_SPILL_S512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE + : AMDGPU::SI_SPILL_S512_SAVE; case 128: - return AMDGPU::SI_SPILL_S1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE + : AMDGPU::SI_SPILL_S1024_SAVE; default: llvm_unreachable("unknown register size"); } } -static unsigned getVGPRSpillSaveOpcode(unsigned Size) { +static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 2: return AMDGPU::SI_SPILL_V16_SAVE; case 4: - return AMDGPU::SI_SPILL_V32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE; case 8: - return AMDGPU::SI_SPILL_V64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE; case 12: - return AMDGPU::SI_SPILL_V96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE; case 16: - return AMDGPU::SI_SPILL_V128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE + : AMDGPU::SI_SPILL_V128_SAVE; case 20: - return AMDGPU::SI_SPILL_V160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE + : AMDGPU::SI_SPILL_V160_SAVE; case 24: - return AMDGPU::SI_SPILL_V192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V192_CFI_SAVE + : AMDGPU::SI_SPILL_V192_SAVE; case 28: - return AMDGPU::SI_SPILL_V224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V224_CFI_SAVE + : AMDGPU::SI_SPILL_V224_SAVE; case 32: - return AMDGPU::SI_SPILL_V256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE + : AMDGPU::SI_SPILL_V256_SAVE; case 36: - return AMDGPU::SI_SPILL_V288_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V288_CFI_SAVE + : AMDGPU::SI_SPILL_V288_SAVE; case 40: - return AMDGPU::SI_SPILL_V320_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V320_CFI_SAVE + : AMDGPU::SI_SPILL_V320_SAVE; case 44: - return AMDGPU::SI_SPILL_V352_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V352_CFI_SAVE + : AMDGPU::SI_SPILL_V352_SAVE; case 48: - return AMDGPU::SI_SPILL_V384_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V384_CFI_SAVE + : AMDGPU::SI_SPILL_V384_SAVE; case 64: - return AMDGPU::SI_SPILL_V512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE + : AMDGPU::SI_SPILL_V512_SAVE; case 128: - return AMDGPU::SI_SPILL_V1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE + : AMDGPU::SI_SPILL_V1024_SAVE; default: llvm_unreachable("unknown register size"); } } -static unsigned getAVSpillSaveOpcode(unsigned Size) { +static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_AV32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV32_CFI_SAVE + : AMDGPU::SI_SPILL_AV32_SAVE; case 8: - return AMDGPU::SI_SPILL_AV64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV64_CFI_SAVE + : AMDGPU::SI_SPILL_AV64_SAVE; case 12: - return AMDGPU::SI_SPILL_AV96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV96_CFI_SAVE + : AMDGPU::SI_SPILL_AV96_SAVE; case 16: - return AMDGPU::SI_SPILL_AV128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV128_CFI_SAVE + : AMDGPU::SI_SPILL_AV128_SAVE; case 20: - return AMDGPU::SI_SPILL_AV160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV160_CFI_SAVE + : AMDGPU::SI_SPILL_AV160_SAVE; case 24: - return AMDGPU::SI_SPILL_AV192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV192_CFI_SAVE + : AMDGPU::SI_SPILL_AV192_SAVE; case 28: - return AMDGPU::SI_SPILL_AV224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV224_CFI_SAVE + : AMDGPU::SI_SPILL_AV224_SAVE; case 32: - return AMDGPU::SI_SPILL_AV256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV256_CFI_SAVE + : AMDGPU::SI_SPILL_AV256_SAVE; case 36: return AMDGPU::SI_SPILL_AV288_SAVE; case 40: @@ -1629,9 +1654,11 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) { case 48: return AMDGPU::SI_SPILL_AV384_SAVE; case 64: - return AMDGPU::SI_SPILL_AV512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV512_CFI_SAVE + : AMDGPU::SI_SPILL_AV512_SAVE; case 128: - return AMDGPU::SI_SPILL_AV1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV1024_CFI_SAVE + : AMDGPU::SI_SPILL_AV1024_SAVE; default: llvm_unreachable("unknown register size"); } @@ -1651,7 +1678,7 @@ static unsigned getWWMRegSpillSaveOpcode(unsigned Size, unsigned SIInstrInfo::getVectorRegSpillSaveOpcode( Register Reg, const TargetRegisterClass *RC, unsigned Size, - const SIMachineFunctionInfo &MFI) const { + const SIMachineFunctionInfo &MFI, bool NeedsCFI) const { bool IsVectorSuperClass = RI.isVectorSuperClass(RC); // Choose the right opcode if spilling a WWM register. @@ -1660,16 +1687,16 @@ unsigned SIInstrInfo::getVectorRegSpillSaveOpcode( // TODO: Check if AGPRs are available if (ST.hasMAIInsts()) - return getAVSpillSaveOpcode(Size); + return getAVSpillSaveOpcode(Size, NeedsCFI); - return getVGPRSpillSaveOpcode(Size); + return getVGPRSpillSaveOpcode(Size, NeedsCFI); } -void SIInstrInfo::storeRegToStackSlot( +void SIInstrInfo::storeRegToStackSlotImpl( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags, + bool NeedsCFI) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); @@ -1691,7 +1718,8 @@ void SIInstrInfo::storeRegToStackSlot( // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); + const MCInstrDesc &OpDesc = + get(getSGPRSpillSaveOpcode(SpillSize, NeedsCFI)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. @@ -1710,8 +1738,8 @@ void SIInstrInfo::storeRegToStackSlot( return; } - unsigned Opcode = - getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, SpillSize, *MFI); + unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, + SpillSize, *MFI, NeedsCFI); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) @@ -1722,6 +1750,25 @@ void SIInstrInfo::storeRegToStackSlot( .addMemOperand(MMO); } +void SIInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg, + MachineInstr::MIFlag Flags) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, VReg, + Flags, false); +} + +void SIInstrInfo::storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, + Register(), MachineInstr::NoFlags, true); +} + static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { case 4: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5fdeddaf3f736..9c0a80bbcecda 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -293,13 +293,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const; +private: + void storeRegToStackSlotImpl(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg, + MachineInstr::MIFlag Flags, bool NeedsCFI) const; + +public: + void storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override; unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, - const SIMachineFunctionInfo &MFI) const; + const SIMachineFunctionInfo &MFI, + bool NeedsCFI) const; unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, @@ -703,6 +719,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { static bool isBlockLoadStore(uint16_t Opcode) { switch (Opcode) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: case AMDGPU::SCRATCH_STORE_BLOCK_SADDR: case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6f1feb1dc2996..3498f18467466 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1086,6 +1086,11 @@ multiclass SI_SPILL_SGPR { let mayLoad = 0; } + def _CFI_SAVE : PseudoInstSI<(outs), (ins sgpr_class:$data, i32imm:$addr)> { + let mayStore = 1; + let mayLoad = 0; + } + def _RESTORE : PseudoInstSI < (outs sgpr_class:$data), (ins i32imm:$addr)> { @@ -1159,6 +1164,19 @@ multiclass SI_SPILL_VGPR { + let mayStore = 1; + let mayLoad = 0; + // (2 * 4) + (8 * num_subregs) bytes maximum + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), !add(UsesTmp, 3)), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); + } + def _RESTORE : VPseudoInstSI < (outs vgpr_class:$vdata), !con( diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 40eeeb8a8630d..62386da94d854 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -100,63 +100,25 @@ INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE, char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID; -static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, - const TargetRegisterInfo *TRI) { - for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) { - if (MBB.isLiveIn(*R)) { - return true; - } - } - return false; -} - /// Insert spill code for the callee-saved registers used in the function. -static void insertCSRSaves(MachineBasicBlock &SaveBlock, +static void insertCSRSaves(const GCNSubtarget &ST, MachineBasicBlock &SaveBlock, ArrayRef CSI, SlotIndexes *Indexes, LiveIntervals *LIS) { - MachineFunction &MF = *SaveBlock.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *RI = ST.getRegisterInfo(); - + const TargetFrameLowering *TFI = ST.getFrameLowering(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); - if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { - for (const CalleeSavedInfo &CS : CSI) { - // Insert the spill to the stack frame. - MCRegister Reg = CS.getReg(); - - MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( - Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); - - // If this value was already livein, we probably have a direct use of the - // incoming register value, so don't kill at the spill point. This happens - // since we pass some special inputs (workgroup IDs) in the callee saved - // range. - const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI); - TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), - RC, TRI, Register()); - - if (Indexes) { - assert(std::distance(MIS.begin(), I) == 1); - MachineInstr &Inst = *std::prev(I); - Indexes->insertMachineInstrInMaps(Inst); - } - - if (LIS) - LIS->removeAllRegUnitsForPhysReg(Reg); - } - } else { - // TFI doesn't update Indexes and LIS, so we have to do it separately. - if (Indexes) - Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I); - - if (LIS) - for (const CalleeSavedInfo &CS : CSI) - LIS->removeAllRegUnitsForPhysReg(CS.getReg()); - } + MachineInstrSpan MIS(I, &SaveBlock); + bool Success = TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI); + assert(Success && "spillCalleeSavedRegisters should always succeed"); + (void)Success; + + // TFI doesn't update Indexes and LIS, so we have to do it separately. + if (Indexes) + Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I); + + if (LIS) + for (const CalleeSavedInfo &CS : CSI) + LIS->removeAllRegUnitsForPhysReg(CS.getReg()); } /// Insert restore code for the callee-saved registers used in the function. @@ -268,11 +230,19 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( std::vector CSI; const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + Register RetAddrReg = TRI->getReturnAddressReg(MF); + bool SpillRetAddrReg = false; for (unsigned I = 0; CSRegs[I]; ++I) { MCRegister Reg = CSRegs[I]; if (SavedRegs.test(Reg)) { + if (Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub0) || + Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub1)) { + SpillRetAddrReg = true; + continue; + } + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, MVT::i32); int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), @@ -283,9 +253,21 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( } } + // Return address uses a register pair. Add the super register to the + // CSI list so that it's easier to identify the entire spill and CFI + // can be emitted appropriately. + if (SpillRetAddrReg) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(RetAddrReg, MVT::i64); + int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC), true); + CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI)); + CalleeSavedFIs.push_back(JunkFI); + } + if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) - insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); + insertCSRSaves(ST, *SaveBlock, CSI, Indexes, LIS); // Add live ins to save blocks. assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ebd2e7ecf249e..77608a4cfc751 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1128,6 +1128,7 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: // FIXME: This assumes the mask is statically known and not computed at // runtime. However, some ABIs may want to compute the mask dynamically and @@ -1135,21 +1136,29 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, return llvm::popcount( (uint64_t)TII->getNamedOperand(MI, AMDGPU::OpName::mask)->getImm()); case AMDGPU::SI_SPILL_S1024_SAVE: + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_V1024_SAVE: + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_SPILL_V1024_RESTORE: case AMDGPU::SI_SPILL_A1024_SAVE: + case AMDGPU::SI_SPILL_A1024_CFI_SAVE: case AMDGPU::SI_SPILL_A1024_RESTORE: case AMDGPU::SI_SPILL_AV1024_SAVE: + case AMDGPU::SI_SPILL_AV1024_CFI_SAVE: case AMDGPU::SI_SPILL_AV1024_RESTORE: return 32; case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_A512_SAVE: + case AMDGPU::SI_SPILL_A512_CFI_SAVE: case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_AV512_SAVE: + case AMDGPU::SI_SPILL_AV512_CFI_SAVE: case AMDGPU::SI_SPILL_AV512_RESTORE: return 16; case AMDGPU::SI_SPILL_S384_SAVE: @@ -1189,75 +1198,107 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, case AMDGPU::SI_SPILL_AV288_RESTORE: return 9; case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_A256_SAVE: + case AMDGPU::SI_SPILL_A256_CFI_SAVE: case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_AV256_SAVE: + case AMDGPU::SI_SPILL_AV256_CFI_SAVE: case AMDGPU::SI_SPILL_AV256_RESTORE: return 8; case AMDGPU::SI_SPILL_S224_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_V224_SAVE: + case AMDGPU::SI_SPILL_V224_CFI_SAVE: case AMDGPU::SI_SPILL_V224_RESTORE: case AMDGPU::SI_SPILL_A224_SAVE: + case AMDGPU::SI_SPILL_A224_CFI_SAVE: case AMDGPU::SI_SPILL_A224_RESTORE: case AMDGPU::SI_SPILL_AV224_SAVE: + case AMDGPU::SI_SPILL_AV224_CFI_SAVE: case AMDGPU::SI_SPILL_AV224_RESTORE: return 7; case AMDGPU::SI_SPILL_S192_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_V192_SAVE: + case AMDGPU::SI_SPILL_V192_CFI_SAVE: case AMDGPU::SI_SPILL_V192_RESTORE: case AMDGPU::SI_SPILL_A192_SAVE: + case AMDGPU::SI_SPILL_A192_CFI_SAVE: case AMDGPU::SI_SPILL_A192_RESTORE: case AMDGPU::SI_SPILL_AV192_SAVE: + case AMDGPU::SI_SPILL_AV192_CFI_SAVE: case AMDGPU::SI_SPILL_AV192_RESTORE: return 6; case AMDGPU::SI_SPILL_S160_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_V160_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_A160_SAVE: + case AMDGPU::SI_SPILL_A160_CFI_SAVE: case AMDGPU::SI_SPILL_A160_RESTORE: case AMDGPU::SI_SPILL_AV160_SAVE: + case AMDGPU::SI_SPILL_AV160_CFI_SAVE: case AMDGPU::SI_SPILL_AV160_RESTORE: return 5; case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_A128_SAVE: + case AMDGPU::SI_SPILL_A128_CFI_SAVE: case AMDGPU::SI_SPILL_A128_RESTORE: case AMDGPU::SI_SPILL_AV128_SAVE: + case AMDGPU::SI_SPILL_AV128_CFI_SAVE: case AMDGPU::SI_SPILL_AV128_RESTORE: return 4; case AMDGPU::SI_SPILL_S96_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_A96_SAVE: + case AMDGPU::SI_SPILL_A96_CFI_SAVE: case AMDGPU::SI_SPILL_A96_RESTORE: case AMDGPU::SI_SPILL_AV96_SAVE: + case AMDGPU::SI_SPILL_AV96_CFI_SAVE: case AMDGPU::SI_SPILL_AV96_RESTORE: return 3; case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_A64_SAVE: + case AMDGPU::SI_SPILL_A64_CFI_SAVE: case AMDGPU::SI_SPILL_A64_RESTORE: case AMDGPU::SI_SPILL_AV64_SAVE: + case AMDGPU::SI_SPILL_AV64_CFI_SAVE: case AMDGPU::SI_SPILL_AV64_RESTORE: return 2; case AMDGPU::SI_SPILL_S32_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: case AMDGPU::SI_SPILL_S32_RESTORE: case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_A32_SAVE: + case AMDGPU::SI_SPILL_A32_CFI_SAVE: case AMDGPU::SI_SPILL_A32_RESTORE: case AMDGPU::SI_SPILL_AV32_SAVE: + case AMDGPU::SI_SPILL_AV32_CFI_SAVE: case AMDGPU::SI_SPILL_AV32_RESTORE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_RESTORE: @@ -1386,14 +1427,14 @@ static int getOffenMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - int Index, unsigned Lane, - unsigned ValueReg, bool IsKill) { +static MachineInstrBuilder +spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, int Index, unsigned Lane, + unsigned ValueReg, bool IsKill, bool NeedsCFI) { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); + const SIFrameLowering *TFL = ST.getFrameLowering(); MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); @@ -1416,6 +1457,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst) .addReg(Src, getKillRegState(IsKill)); CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); + if (NeedsCFI) + TFL->buildCFIForVRegToVRegSpill(MBB, MI, DL, Src, Dst); return CopyMIB; } unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 @@ -1424,6 +1467,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst) .addReg(Src, getKillRegState(IsKill)); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); + if (NeedsCFI) + TFL->buildCFIForVRegToVRegSpill(MBB, MI, DL, Src, Dst); return MIB; } @@ -1446,7 +1491,8 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false, false) + .getInstr()) return true; MachineInstrBuilder NewMI = @@ -1511,12 +1557,13 @@ void SIRegisterInfo::buildSpillLoadStore( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, - RegScavenger *RS, LiveRegUnits *LiveUnits) const { + RegScavenger *RS, LiveRegUnits *LiveUnits, bool NeedsCFI) const { assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both"); MachineFunction *MF = MBB.getParent(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); + const SIFrameLowering *TFL = ST.getFrameLowering(); const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const MCInstrDesc *Desc = &TII->get(LoadStoreOp); @@ -1548,6 +1595,7 @@ void SIRegisterInfo::buildSpillLoadStore( int64_t MaxOffset = Offset + Size + RemSize - EltSize; int64_t ScratchOffsetRegDelta = 0; + int64_t AdditionalCFIOffset = 0; if (IsFlat && EltSize > 4) { LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); @@ -1660,6 +1708,7 @@ void SIRegisterInfo::buildSpillLoadStore( Scavenged = true; } + AdditionalCFIOffset = Offset; // We currently only support spilling VGPRs to EltSize boundaries, meaning // we can simplify the adjustment of Offset here to just scale with // WavefrontSize. @@ -1762,7 +1811,8 @@ void SIRegisterInfo::buildSpillLoadStore( Register Sub = IsSubReg ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) : ValueReg; - auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); + auto MIB = + spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill, NeedsCFI); if (!MIB.getInstr()) break; if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) { @@ -1863,6 +1913,18 @@ void SIRegisterInfo::buildSpillLoadStore( MIB.addImm(0); // swz MIB.addMemOperand(NewMMO); + if (IsStore && NeedsCFI) { + if (TII->isBlockLoadStore(LoadStoreOp)) { + assert(RegOffset == 0 && + "expected whole register block to be treated as single element"); + buildCFIForBlockCSRStore(MBB, MI, ValueReg, Offset); + } else { + TFL->buildCFIForVGPRToVMEMSpill( + MBB, MI, DebugLoc(), SubReg, + (Offset + RegOffset) * ST.getWavefrontSize() + AdditionalCFIOffset); + } + } + if (!IsAGPR && NeedSuperRegDef) MIB.addReg(ValueReg, RegState::ImplicitDefine); @@ -1934,6 +1996,31 @@ void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, MIB.addUse(BaseVGPR + RegOffset, RegState::Implicit); } +void SIRegisterInfo::buildCFIForBlockCSRStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register BlockReg, + int64_t Offset) const { + const MachineFunction *MF = MBB.getParent(); + const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); + uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(BlockReg); + Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0); + for (unsigned RegOffset = 0; RegOffset < 32; ++RegOffset) { + Register VGPR = BaseVGPR + RegOffset; + if (Mask & (1 << RegOffset)) { + assert(isCalleeSavedPhysReg(VGPR, *MF)); + ST.getFrameLowering()->buildCFIForVGPRToVMEMSpill( + MBB, MBBI, DebugLoc(), VGPR, + (Offset + RegOffset) * ST.getWavefrontSize()); + } else if (isCalleeSavedPhysReg(VGPR, *MF)) { + // FIXME: This is a workaround for the fact that FrameLowering's + // emitPrologueEntryCFI considers the block load to clobber all registers + // in the block. + ST.getFrameLowering()->buildCFIForSameValue(MBB, MBBI, DebugLoc(), + BaseVGPR + RegOffset); + } + } +} + void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill) const { @@ -1970,7 +2057,7 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, LiveIntervals *LIS, bool OnlyToVGPR, - bool SpillToPhysVGPRLane) const { + bool SpillToPhysVGPRLane, bool NeedsCFI) const { assert(!MI->getOperand(0).isUndef() && "undef spill should have been deleted earlier"); @@ -1983,6 +2070,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, if (OnlyToVGPR && !SpillToVGPR) return false; + const SIFrameLowering *TFL = ST.getFrameLowering(); + assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())); @@ -2015,11 +2104,27 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR); + + MachineInstr *CFI = nullptr; + if (NeedsCFI) { + if (SB.SuperReg == SB.TRI.getReturnAddressReg(SB.MF)) { + if (i == e - 1) + CFI = TFL->buildCFIForSGPRToVGPRSpill(*SB.MBB, MI, DebugLoc(), + AMDGPU::PC_REG, VGPRSpills); + } else { + CFI = TFL->buildCFIForSGPRToVGPRSpill(*SB.MBB, MI, DebugLoc(), SubReg, + Spill.VGPR, Spill.Lane); + } + } + if (Indexes) { if (IsFirstSubreg) Indexes->replaceMachineInstrInMaps(*MI, *MIB); else Indexes->insertMachineInstrInMaps(*MIB); + + if (CFI) + Indexes->insertMachineInstrInMaps(*CFI); } if (IsFirstSubreg && SB.NumSubRegs > 1) { @@ -2084,6 +2189,18 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, // Write out VGPR SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); + + // TODO: Implement CFI for SpillToVMEM for all scenarios. + MachineInstr *CFI = nullptr; + if (NeedsCFI && SB.SuperReg == SB.TRI.getReturnAddressReg(SB.MF)) { + int64_t CFIOffset = (Offset * SB.EltSize + + SB.MF.getFrameInfo().getObjectOffset(Index)) * + ST.getWavefrontSize(); + CFI = TFL->buildCFIForSGPRToVMEMSpill(*SB.MBB, MI, DebugLoc(), + AMDGPU::PC_REG, CFIOffset); + } + if (Indexes && CFI) + Indexes->insertMachineInstrInMaps(*CFI); } SB.restore(); @@ -2255,7 +2372,20 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const { + bool NeedsCFI = false; switch (MI->getOpcode()) { + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: + NeedsCFI = true; + [[fallthrough]]; case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S384_SAVE: @@ -2270,7 +2400,8 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); + return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane, + NeedsCFI); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S384_RESTORE: @@ -2313,8 +2444,23 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, ? getBaseRegister() : getFrameRegister(*MF); + bool NeedsCFI = false; + switch (MI->getOpcode()) { // SGPR register spill + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: { + NeedsCFI = true; + [[fallthrough]]; + } case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S384_SAVE: @@ -2329,7 +2475,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { - return spillSGPR(MI, Index, RS); + return spillSGPR(MI, Index, RS, nullptr, nullptr, false, false, NeedsCFI); } // SGPR register restore @@ -2351,13 +2497,40 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // VGPR register spill - case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: { - // Put mask into M0. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: + case AMDGPU::SI_SPILL_V224_CFI_SAVE: + case AMDGPU::SI_SPILL_V192_CFI_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: + case AMDGPU::SI_SPILL_A1024_CFI_SAVE: + case AMDGPU::SI_SPILL_A512_CFI_SAVE: + case AMDGPU::SI_SPILL_A256_CFI_SAVE: + case AMDGPU::SI_SPILL_A224_CFI_SAVE: + case AMDGPU::SI_SPILL_A192_CFI_SAVE: + case AMDGPU::SI_SPILL_A160_CFI_SAVE: + case AMDGPU::SI_SPILL_A128_CFI_SAVE: + case AMDGPU::SI_SPILL_A96_CFI_SAVE: + case AMDGPU::SI_SPILL_A64_CFI_SAVE: + case AMDGPU::SI_SPILL_A32_CFI_SAVE: + case AMDGPU::SI_SPILL_AV1024_CFI_SAVE: + case AMDGPU::SI_SPILL_AV512_CFI_SAVE: + case AMDGPU::SI_SPILL_AV256_CFI_SAVE: + case AMDGPU::SI_SPILL_AV224_CFI_SAVE: + case AMDGPU::SI_SPILL_AV192_CFI_SAVE: + case AMDGPU::SI_SPILL_AV160_CFI_SAVE: + case AMDGPU::SI_SPILL_AV128_CFI_SAVE: + case AMDGPU::SI_SPILL_AV96_CFI_SAVE: + case AMDGPU::SI_SPILL_AV64_CFI_SAVE: + case AMDGPU::SI_SPILL_AV32_CFI_SAVE: + NeedsCFI = true; [[fallthrough]]; - } + case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V384_SAVE: @@ -2403,6 +2576,16 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_AV32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { + assert( + MI->getOpcode() != AMDGPU::SI_BLOCK_SPILL_V1024_SAVE && + "block spill does not currenty support spilling non-CSR registers"); + + if (MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE) + // Put mask into M0. + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), + AMDGPU::M0) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); + const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); if (VData->isUndef()) { @@ -2418,7 +2601,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16; } else { - Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE + Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -2428,12 +2611,12 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); if (IsWWMRegSpill) { TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), - RS->isRegUsed(AMDGPU::SCC)); + RS->isRegUsed(AMDGPU::SCC)); } buildSpillLoadStore( *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), RS); + *MI->memoperands_begin(), RS, nullptr, NeedsCFI); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(*MI, TII)); if (IsWWMRegSpill) TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 7b91ba7bc581f..2dae5f0eb1c69 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -121,6 +121,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const; + // Iterate over all VGPRs in the given BlockReg and emit CFI for each VGPR + // as-needed depending on the (statically known) mask, relative to the given + // base Offset. + void buildCFIForBlockCSRStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register BlockReg, int64_t Offset) const; + const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; @@ -176,8 +183,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { /// free VGPR lane to spill. bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false, - bool SpillToPhysVGPRLane = false) const; + bool OnlyToVGPR = false, bool SpillToPhysVGPRLane = false, + bool NeedsCFI = false) const; bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, @@ -459,8 +466,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, - RegScavenger *RS, - LiveRegUnits *LiveUnits = nullptr) const; + RegScavenger *RS, LiveRegUnits *LiveUnits = nullptr, + bool NeedsCFI = false) const; // Return alignment in register file of first register in a register tuple. unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll index b84b31cd2702c..2a5c8be7a987a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll @@ -13,20 +13,20 @@ define ptr addrspace(1) @call_assert_align() { ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: global_store_dword v[0:1], v2, off ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index e3228162be22a..3e3e788b2f31d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -223,23 +223,23 @@ define void @func_caller_stack() { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 9 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; MUBUF-NEXT: v_mov_b32_e32 v0, 10 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; MUBUF-NEXT: v_mov_b32_e32 v0, 11 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; MUBUF-NEXT: v_mov_b32_e32 v0, 12 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -258,7 +258,9 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_u32 s0, s32, 4 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 @@ -270,15 +272,13 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_add_u32 s0, s32, 16 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -301,14 +301,14 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen ; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_waitcnt vmcnt(1) ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; MUBUF-NEXT: s_waitcnt vmcnt(1) @@ -363,8 +363,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-NEXT: s_waitcnt vmcnt(1) ; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -383,13 +383,13 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 -; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_byval@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 ; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:8 @@ -414,8 +414,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index c100d653c1cd7..e058a3e5c332e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -236,16 +236,16 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s16, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], 0 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index 003aa049b2d1b..324d853145924 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -337,7 +337,6 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-LABEL: flat_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -354,6 +353,7 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -464,7 +464,6 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-LABEL: flat_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -481,6 +480,7 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] @@ -4006,7 +4006,6 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-LABEL: flat_atomic_xor_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -4023,6 +4022,7 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -4131,7 +4131,6 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-LABEL: flat_atomic_xor_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -4148,6 +4147,7 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll index 34a4899123749..e67d5b0fad14a 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll @@ -337,7 +337,6 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-LABEL: global_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -354,6 +353,7 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -464,7 +464,6 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-LABEL: global_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -481,6 +480,7 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] @@ -2985,7 +2985,6 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-LABEL: global_atomic_xor_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -3002,6 +3001,7 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -3110,7 +3110,6 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-LABEL: global_atomic_xor_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -3127,6 +3126,7 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index 3194581fa4213..2d7cfcea04124 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -23,15 +23,15 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; FIXEDABI-NEXT: s_mov_b64 exec, s[18:19] ; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2 -; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 ; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 +; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 +; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 ; FIXEDABI-NEXT: s_getpc_b64 s[16:17] ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 -; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 ; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 ; FIXEDABI-NEXT: s_mov_b32 s32, s33 ; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2 ; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index 4ff3f5c13d42a..23cae4b6a6baa 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -514,229 +514,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -1473,229 +1697,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -2462,229 +2910,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -3424,229 +4096,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -4415,229 +5311,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -5380,229 +6500,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -6367,229 +7711,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -7326,229 +8894,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -8314,229 +10106,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -9276,229 +11292,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -10265,229 +12505,453 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -11230,229 +13694,453 @@ body: | ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index ebbeab94066d6..a21db73cf3714 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -451,6 +451,7 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX90A-LABEL: v32_asm_def_use: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_accvgpr_read_b32 v35, a32 ; Reload Reuse ; GFX90A-NEXT: v_mov_b32_e32 v34, v0 ; GFX90A-NEXT: v_mov_b32_e32 v33, v1 ; GFX90A-NEXT: ;;#ASMSTART @@ -478,8 +479,8 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; copy ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v35, a32 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_mov_b32 a32, a1 +; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; copy @@ -1056,6 +1057,7 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX90A-LABEL: no_free_vgprs_at_sgpr_to_agpr_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_accvgpr_read_b32 v34, a32 ; Reload Reuse ; GFX90A-NEXT: v_mov_b32_e32 v33, v0 ; GFX90A-NEXT: v_mov_b32_e32 v32, v1 ; GFX90A-NEXT: ;;#ASMSTART @@ -1077,8 +1079,7 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a18, s2 ; GFX90A-NEXT: v_accvgpr_write_b32 a17, s1 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, s0 -; GFX90A-NEXT: v_accvgpr_read_b32 v34, a32 ; Reload Reuse -; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: s_nop 1 ; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX90A-NEXT: s_nop 10 ; GFX90A-NEXT: buffer_store_dword a0, off, s[0:3], s32 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll index eb6482401f764..5943fdc10c14d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll @@ -19,24 +19,25 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 ; DAGISEL-NEXT: v_writelane_b32 v42, s0, 2 +; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 ; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 +; DAGISEL-NEXT: v_writelane_b32 v42, s31, 1 ; DAGISEL-NEXT: v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1 ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 ; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo -; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 -; DAGISEL-NEXT: v_writelane_b32 v42, s31, 1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL-NEXT: global_store_b32 v[40:41], v0, off ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_load_b32 v41, off, s33 ; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1 ; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0 +; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1 ; DAGISEL-NEXT: s_mov_b32 s32, s33 ; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2 ; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -62,24 +63,25 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_mov_b32 exec_lo, s1 ; GISEL-NEXT: v_writelane_b32 v42, s0, 2 +; GISEL-NEXT: s_add_co_i32 s32, s32, 16 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v41, s33 ; GISEL-NEXT: v_writelane_b32 v42, s30, 0 +; GISEL-NEXT: v_writelane_b32 v42, s31, 1 ; GISEL-NEXT: v_dual_mov_b32 v40, v1 :: v_dual_mov_b32 v41, v2 ; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 ; GISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi -; GISEL-NEXT: s_add_co_i32 s32, s32, 16 -; GISEL-NEXT: v_writelane_b32 v42, s31, 1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL-NEXT: global_store_b32 v[40:41], v0, off ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_load_b32 v41, off, s33 ; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GISEL-NEXT: v_readlane_b32 s31, v42, 1 ; GISEL-NEXT: v_readlane_b32 s30, v42, 0 +; GISEL-NEXT: v_readlane_b32 s31, v42, 1 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s0, v42, 2 ; GISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -1058,15 +1060,14 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 2 ; DAGISEL-NEXT: v_writelane_b32 v40, s30, 0 -; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo ; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 ; DAGISEL-NEXT: v_writelane_b32 v40, s31, 1 +; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1 ; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0 +; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1 ; DAGISEL-NEXT: s_mov_b32 s32, s33 ; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2 ; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -1093,15 +1094,14 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; GISEL-NEXT: s_mov_b32 exec_lo, s1 ; GISEL-NEXT: v_writelane_b32 v40, s0, 2 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi ; GISEL-NEXT: s_add_co_i32 s32, s32, 16 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s0, v40, 2 ; GISEL-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index d821801677d79..7aa648f674f35 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -6757,24 +6757,43 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -7466,42 +7485,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s98, 34 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -7521,7 +7541,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB13_4 @@ -8391,6 +8410,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v22, 11 ; SI-NEXT: v_readlane_b32 s17, v22, 17 ; SI-NEXT: v_readlane_b32 s15, v22, 23 @@ -8398,42 +8418,41 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v22, 35 ; SI-NEXT: v_readlane_b32 s9, v22, 41 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -8630,38 +8649,39 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -8681,7 +8701,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -9470,43 +9489,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: v_readlane_b32 s15, v21, 1 ; VI-NEXT: v_readlane_b32 s13, v21, 3 ; VI-NEXT: v_readlane_b32 s11, v21, 5 ; VI-NEXT: v_readlane_b32 s9, v21, 7 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -9679,42 +9698,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_writelane_b32 v20, s31, 1 -; GFX9-NEXT: v_writelane_b32 v20, s34, 2 -; GFX9-NEXT: v_writelane_b32 v20, s35, 3 -; GFX9-NEXT: v_writelane_b32 v20, s36, 4 -; GFX9-NEXT: v_writelane_b32 v20, s37, 5 -; GFX9-NEXT: v_writelane_b32 v20, s38, 6 -; GFX9-NEXT: v_writelane_b32 v20, s39, 7 -; GFX9-NEXT: v_writelane_b32 v20, s48, 8 -; GFX9-NEXT: v_writelane_b32 v20, s49, 9 -; GFX9-NEXT: v_writelane_b32 v20, s50, 10 -; GFX9-NEXT: v_writelane_b32 v20, s51, 11 -; GFX9-NEXT: v_writelane_b32 v20, s52, 12 -; GFX9-NEXT: v_writelane_b32 v20, s53, 13 -; GFX9-NEXT: v_writelane_b32 v20, s54, 14 -; GFX9-NEXT: v_writelane_b32 v20, s55, 15 -; GFX9-NEXT: v_writelane_b32 v20, s64, 16 -; GFX9-NEXT: v_writelane_b32 v20, s65, 17 -; GFX9-NEXT: v_writelane_b32 v20, s66, 18 -; GFX9-NEXT: v_writelane_b32 v20, s67, 19 -; GFX9-NEXT: v_writelane_b32 v20, s68, 20 -; GFX9-NEXT: v_writelane_b32 v20, s69, 21 -; GFX9-NEXT: v_writelane_b32 v20, s70, 22 -; GFX9-NEXT: v_writelane_b32 v20, s71, 23 -; GFX9-NEXT: v_writelane_b32 v20, s80, 24 -; GFX9-NEXT: v_writelane_b32 v20, s81, 25 -; GFX9-NEXT: v_writelane_b32 v20, s82, 26 -; GFX9-NEXT: v_writelane_b32 v20, s83, 27 -; GFX9-NEXT: v_writelane_b32 v20, s84, 28 -; GFX9-NEXT: v_writelane_b32 v20, s85, 29 -; GFX9-NEXT: v_writelane_b32 v20, s86, 30 -; GFX9-NEXT: v_writelane_b32 v20, s87, 31 -; GFX9-NEXT: v_writelane_b32 v20, s96, 32 -; GFX9-NEXT: v_writelane_b32 v20, s97, 33 +; GFX9-NEXT: v_writelane_b32 v20, s34, 0 +; GFX9-NEXT: v_writelane_b32 v20, s35, 1 +; GFX9-NEXT: v_writelane_b32 v20, s36, 2 +; GFX9-NEXT: v_writelane_b32 v20, s37, 3 +; GFX9-NEXT: v_writelane_b32 v20, s38, 4 +; GFX9-NEXT: v_writelane_b32 v20, s39, 5 +; GFX9-NEXT: v_writelane_b32 v20, s48, 6 +; GFX9-NEXT: v_writelane_b32 v20, s49, 7 +; GFX9-NEXT: v_writelane_b32 v20, s50, 8 +; GFX9-NEXT: v_writelane_b32 v20, s51, 9 +; GFX9-NEXT: v_writelane_b32 v20, s52, 10 +; GFX9-NEXT: v_writelane_b32 v20, s53, 11 +; GFX9-NEXT: v_writelane_b32 v20, s54, 12 +; GFX9-NEXT: v_writelane_b32 v20, s55, 13 +; GFX9-NEXT: v_writelane_b32 v20, s64, 14 +; GFX9-NEXT: v_writelane_b32 v20, s65, 15 +; GFX9-NEXT: v_writelane_b32 v20, s66, 16 +; GFX9-NEXT: v_writelane_b32 v20, s67, 17 +; GFX9-NEXT: v_writelane_b32 v20, s68, 18 +; GFX9-NEXT: v_writelane_b32 v20, s69, 19 +; GFX9-NEXT: v_writelane_b32 v20, s70, 20 +; GFX9-NEXT: v_writelane_b32 v20, s71, 21 +; GFX9-NEXT: v_writelane_b32 v20, s80, 22 +; GFX9-NEXT: v_writelane_b32 v20, s81, 23 +; GFX9-NEXT: v_writelane_b32 v20, s82, 24 +; GFX9-NEXT: v_writelane_b32 v20, s83, 25 +; GFX9-NEXT: v_writelane_b32 v20, s84, 26 +; GFX9-NEXT: v_writelane_b32 v20, s85, 27 +; GFX9-NEXT: v_writelane_b32 v20, s86, 28 +; GFX9-NEXT: v_writelane_b32 v20, s87, 29 +; GFX9-NEXT: v_writelane_b32 v20, s96, 30 +; GFX9-NEXT: v_writelane_b32 v20, s97, 31 +; GFX9-NEXT: v_writelane_b32 v20, s98, 32 +; GFX9-NEXT: v_writelane_b32 v20, s99, 33 +; GFX9-NEXT: v_writelane_b32 v20, s30, 34 +; GFX9-NEXT: v_writelane_b32 v20, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v20, s98, 34 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -9734,7 +9754,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: v_writelane_b32 v20, s99, 35 ; GFX9-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB13_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -10468,44 +10487,44 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:120 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v20, 34 ; GFX9-NEXT: v_readlane_b32 s9, v21, 1 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:124 -; GFX9-NEXT: v_readlane_b32 s99, v20, 35 -; GFX9-NEXT: v_readlane_b32 s98, v20, 34 -; GFX9-NEXT: v_readlane_b32 s97, v20, 33 -; GFX9-NEXT: v_readlane_b32 s96, v20, 32 -; GFX9-NEXT: v_readlane_b32 s87, v20, 31 -; GFX9-NEXT: v_readlane_b32 s86, v20, 30 -; GFX9-NEXT: v_readlane_b32 s85, v20, 29 -; GFX9-NEXT: v_readlane_b32 s84, v20, 28 -; GFX9-NEXT: v_readlane_b32 s83, v20, 27 -; GFX9-NEXT: v_readlane_b32 s82, v20, 26 -; GFX9-NEXT: v_readlane_b32 s81, v20, 25 -; GFX9-NEXT: v_readlane_b32 s80, v20, 24 -; GFX9-NEXT: v_readlane_b32 s71, v20, 23 -; GFX9-NEXT: v_readlane_b32 s70, v20, 22 -; GFX9-NEXT: v_readlane_b32 s69, v20, 21 -; GFX9-NEXT: v_readlane_b32 s68, v20, 20 -; GFX9-NEXT: v_readlane_b32 s67, v20, 19 -; GFX9-NEXT: v_readlane_b32 s66, v20, 18 -; GFX9-NEXT: v_readlane_b32 s65, v20, 17 -; GFX9-NEXT: v_readlane_b32 s64, v20, 16 -; GFX9-NEXT: v_readlane_b32 s55, v20, 15 -; GFX9-NEXT: v_readlane_b32 s54, v20, 14 -; GFX9-NEXT: v_readlane_b32 s53, v20, 13 -; GFX9-NEXT: v_readlane_b32 s52, v20, 12 -; GFX9-NEXT: v_readlane_b32 s51, v20, 11 -; GFX9-NEXT: v_readlane_b32 s50, v20, 10 -; GFX9-NEXT: v_readlane_b32 s49, v20, 9 -; GFX9-NEXT: v_readlane_b32 s48, v20, 8 -; GFX9-NEXT: v_readlane_b32 s39, v20, 7 -; GFX9-NEXT: v_readlane_b32 s38, v20, 6 -; GFX9-NEXT: v_readlane_b32 s37, v20, 5 -; GFX9-NEXT: v_readlane_b32 s36, v20, 4 -; GFX9-NEXT: v_readlane_b32 s35, v20, 3 -; GFX9-NEXT: v_readlane_b32 s34, v20, 2 -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 -; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 35 +; GFX9-NEXT: v_readlane_b32 s99, v20, 33 +; GFX9-NEXT: v_readlane_b32 s98, v20, 32 +; GFX9-NEXT: v_readlane_b32 s97, v20, 31 +; GFX9-NEXT: v_readlane_b32 s96, v20, 30 +; GFX9-NEXT: v_readlane_b32 s87, v20, 29 +; GFX9-NEXT: v_readlane_b32 s86, v20, 28 +; GFX9-NEXT: v_readlane_b32 s85, v20, 27 +; GFX9-NEXT: v_readlane_b32 s84, v20, 26 +; GFX9-NEXT: v_readlane_b32 s83, v20, 25 +; GFX9-NEXT: v_readlane_b32 s82, v20, 24 +; GFX9-NEXT: v_readlane_b32 s81, v20, 23 +; GFX9-NEXT: v_readlane_b32 s80, v20, 22 +; GFX9-NEXT: v_readlane_b32 s71, v20, 21 +; GFX9-NEXT: v_readlane_b32 s70, v20, 20 +; GFX9-NEXT: v_readlane_b32 s69, v20, 19 +; GFX9-NEXT: v_readlane_b32 s68, v20, 18 +; GFX9-NEXT: v_readlane_b32 s67, v20, 17 +; GFX9-NEXT: v_readlane_b32 s66, v20, 16 +; GFX9-NEXT: v_readlane_b32 s65, v20, 15 +; GFX9-NEXT: v_readlane_b32 s64, v20, 14 +; GFX9-NEXT: v_readlane_b32 s55, v20, 13 +; GFX9-NEXT: v_readlane_b32 s54, v20, 12 +; GFX9-NEXT: v_readlane_b32 s53, v20, 11 +; GFX9-NEXT: v_readlane_b32 s52, v20, 10 +; GFX9-NEXT: v_readlane_b32 s51, v20, 9 +; GFX9-NEXT: v_readlane_b32 s50, v20, 8 +; GFX9-NEXT: v_readlane_b32 s49, v20, 7 +; GFX9-NEXT: v_readlane_b32 s48, v20, 6 +; GFX9-NEXT: v_readlane_b32 s39, v20, 5 +; GFX9-NEXT: v_readlane_b32 s38, v20, 4 +; GFX9-NEXT: v_readlane_b32 s37, v20, 3 +; GFX9-NEXT: v_readlane_b32 s36, v20, 2 +; GFX9-NEXT: v_readlane_b32 s35, v20, 1 +; GFX9-NEXT: v_readlane_b32 s34, v20, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -10675,66 +10694,66 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v16, s30, 0 -; GFX11-NEXT: v_writelane_b32 v17, s96, 0 +; GFX11-NEXT: v_writelane_b32 v16, s34, 0 +; GFX11-NEXT: v_writelane_b32 v16, s35, 1 +; GFX11-NEXT: v_writelane_b32 v16, s36, 2 +; GFX11-NEXT: v_writelane_b32 v16, s37, 3 +; GFX11-NEXT: v_writelane_b32 v16, s38, 4 +; GFX11-NEXT: v_writelane_b32 v16, s39, 5 +; GFX11-NEXT: v_writelane_b32 v16, s48, 6 +; GFX11-NEXT: v_writelane_b32 v16, s49, 7 +; GFX11-NEXT: v_writelane_b32 v16, s50, 8 +; GFX11-NEXT: v_writelane_b32 v16, s51, 9 +; GFX11-NEXT: v_writelane_b32 v16, s52, 10 +; GFX11-NEXT: v_writelane_b32 v16, s53, 11 +; GFX11-NEXT: v_writelane_b32 v16, s54, 12 +; GFX11-NEXT: v_writelane_b32 v16, s55, 13 +; GFX11-NEXT: v_writelane_b32 v16, s64, 14 +; GFX11-NEXT: v_writelane_b32 v16, s65, 15 +; GFX11-NEXT: v_writelane_b32 v16, s66, 16 +; GFX11-NEXT: v_writelane_b32 v16, s67, 17 +; GFX11-NEXT: v_writelane_b32 v16, s68, 18 +; GFX11-NEXT: v_writelane_b32 v16, s69, 19 +; GFX11-NEXT: v_writelane_b32 v16, s70, 20 +; GFX11-NEXT: v_writelane_b32 v16, s71, 21 +; GFX11-NEXT: v_writelane_b32 v16, s80, 22 +; GFX11-NEXT: v_writelane_b32 v16, s81, 23 +; GFX11-NEXT: v_writelane_b32 v16, s82, 24 +; GFX11-NEXT: v_writelane_b32 v16, s83, 25 +; GFX11-NEXT: v_writelane_b32 v16, s84, 26 +; GFX11-NEXT: v_writelane_b32 v16, s85, 27 +; GFX11-NEXT: v_writelane_b32 v16, s86, 28 +; GFX11-NEXT: v_writelane_b32 v16, s87, 29 +; GFX11-NEXT: v_writelane_b32 v16, s96, 30 +; GFX11-NEXT: v_writelane_b32 v16, s97, 31 +; GFX11-NEXT: v_writelane_b32 v17, s98, 0 +; GFX11-NEXT: v_writelane_b32 v17, s99, 1 +; GFX11-NEXT: v_writelane_b32 v17, s100, 2 +; GFX11-NEXT: v_writelane_b32 v17, s101, 3 +; GFX11-NEXT: v_writelane_b32 v17, s102, 4 +; GFX11-NEXT: v_writelane_b32 v17, s103, 5 +; GFX11-NEXT: v_writelane_b32 v17, s104, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-NEXT: v_readfirstlane_b32 s40, v1 ; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v16, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s97, 1 ; GFX11-NEXT: v_readfirstlane_b32 s14, v3 ; GFX11-NEXT: v_readfirstlane_b32 s15, v4 ; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v16, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s98, 2 ; GFX11-NEXT: v_readfirstlane_b32 s13, v6 ; GFX11-NEXT: v_readfirstlane_b32 s10, v7 ; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v16, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s99, 3 ; GFX11-NEXT: v_readfirstlane_b32 s8, v9 ; GFX11-NEXT: v_readfirstlane_b32 s9, v10 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v16, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s100, 4 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: v_readfirstlane_b32 s4, v13 ; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v16, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s101, 5 ; GFX11-NEXT: s_mov_b32 s101, 0 ; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr19 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v16, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s102, 6 -; GFX11-NEXT: v_writelane_b32 v16, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s103, 7 -; GFX11-NEXT: v_writelane_b32 v16, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s104, 8 -; GFX11-NEXT: v_writelane_b32 v16, s49, 9 -; GFX11-NEXT: v_writelane_b32 v16, s50, 10 -; GFX11-NEXT: v_writelane_b32 v16, s51, 11 -; GFX11-NEXT: v_writelane_b32 v16, s52, 12 -; GFX11-NEXT: v_writelane_b32 v16, s53, 13 -; GFX11-NEXT: v_writelane_b32 v16, s54, 14 -; GFX11-NEXT: v_writelane_b32 v16, s55, 15 -; GFX11-NEXT: v_writelane_b32 v16, s64, 16 -; GFX11-NEXT: v_writelane_b32 v16, s65, 17 -; GFX11-NEXT: v_writelane_b32 v16, s66, 18 -; GFX11-NEXT: v_writelane_b32 v16, s67, 19 -; GFX11-NEXT: v_writelane_b32 v16, s68, 20 -; GFX11-NEXT: v_writelane_b32 v16, s69, 21 -; GFX11-NEXT: v_writelane_b32 v16, s70, 22 -; GFX11-NEXT: v_writelane_b32 v16, s71, 23 -; GFX11-NEXT: v_writelane_b32 v16, s80, 24 -; GFX11-NEXT: v_writelane_b32 v16, s81, 25 -; GFX11-NEXT: v_writelane_b32 v16, s82, 26 -; GFX11-NEXT: v_writelane_b32 v16, s83, 27 -; GFX11-NEXT: v_writelane_b32 v16, s84, 28 -; GFX11-NEXT: v_writelane_b32 v16, s85, 29 -; GFX11-NEXT: v_writelane_b32 v16, s86, 30 -; GFX11-NEXT: v_writelane_b32 v16, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s25, 8 @@ -11261,9 +11280,9 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff ; GFX11-NEXT: v_readlane_b32 s17, v19, 29 ; GFX11-NEXT: s_and_b32 s16, s16, 0xff -; GFX11-NEXT: v_readlane_b32 s100, v17, 4 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s99, v17, 3 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 ; GFX11-NEXT: s_or_b32 s2, s2, s3 ; GFX11-NEXT: s_and_b32 s3, s18, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 16 @@ -11297,7 +11316,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s16, s16, s17 ; GFX11-NEXT: v_readlane_b32 s17, v19, 21 ; GFX11-NEXT: s_lshl_b32 s16, s16, 16 -; GFX11-NEXT: v_readlane_b32 s98, v17, 2 +; GFX11-NEXT: v_readlane_b32 s100, v17, 2 ; GFX11-NEXT: s_or_b32 s3, s3, s16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v7, s2 :: v_dual_mov_b32 v8, s3 @@ -11317,12 +11336,12 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s0, s0, s1 ; GFX11-NEXT: s_and_b32 s1, s21, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 8 -; GFX11-NEXT: v_readlane_b32 s86, v16, 30 +; GFX11-NEXT: v_readlane_b32 s99, v17, 1 ; GFX11-NEXT: s_or_b32 s1, s1, s2 ; GFX11-NEXT: v_readlane_b32 s2, v19, 25 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-NEXT: v_readlane_b32 s31, v16, 1 -; GFX11-NEXT: v_readlane_b32 s30, v16, 0 +; GFX11-NEXT: v_readlane_b32 s98, v17, 0 +; GFX11-NEXT: v_readlane_b32 s86, v16, 28 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s2, s3 @@ -11359,9 +11378,9 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_lshl_b32 s3, s88, 8 ; GFX11-NEXT: s_and_b32 s16, s69, 0xff ; GFX11-NEXT: s_and_b32 s18, s72, 0xff -; GFX11-NEXT: v_readlane_b32 s97, v17, 1 +; GFX11-NEXT: v_readlane_b32 s97, v16, 31 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s69, v16, 21 +; GFX11-NEXT: v_readlane_b32 s69, v16, 19 ; GFX11-NEXT: s_or_b32 s1, s2, s3 ; GFX11-NEXT: v_readlane_b32 s3, v19, 16 ; GFX11-NEXT: s_and_b32 s2, s25, 0xff @@ -11568,39 +11587,39 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:112 -; GFX11-NEXT: v_readlane_b32 s104, v17, 8 -; GFX11-NEXT: v_readlane_b32 s103, v17, 7 -; GFX11-NEXT: v_readlane_b32 s102, v17, 6 -; GFX11-NEXT: v_readlane_b32 s101, v17, 5 -; GFX11-NEXT: v_readlane_b32 s96, v17, 0 -; GFX11-NEXT: v_readlane_b32 s87, v16, 31 -; GFX11-NEXT: v_readlane_b32 s85, v16, 29 -; GFX11-NEXT: v_readlane_b32 s84, v16, 28 -; GFX11-NEXT: v_readlane_b32 s83, v16, 27 -; GFX11-NEXT: v_readlane_b32 s82, v16, 26 -; GFX11-NEXT: v_readlane_b32 s81, v16, 25 -; GFX11-NEXT: v_readlane_b32 s80, v16, 24 -; GFX11-NEXT: v_readlane_b32 s71, v16, 23 -; GFX11-NEXT: v_readlane_b32 s70, v16, 22 -; GFX11-NEXT: v_readlane_b32 s68, v16, 20 -; GFX11-NEXT: v_readlane_b32 s67, v16, 19 -; GFX11-NEXT: v_readlane_b32 s66, v16, 18 -; GFX11-NEXT: v_readlane_b32 s65, v16, 17 -; GFX11-NEXT: v_readlane_b32 s64, v16, 16 -; GFX11-NEXT: v_readlane_b32 s55, v16, 15 -; GFX11-NEXT: v_readlane_b32 s54, v16, 14 -; GFX11-NEXT: v_readlane_b32 s53, v16, 13 -; GFX11-NEXT: v_readlane_b32 s52, v16, 12 -; GFX11-NEXT: v_readlane_b32 s51, v16, 11 -; GFX11-NEXT: v_readlane_b32 s50, v16, 10 -; GFX11-NEXT: v_readlane_b32 s49, v16, 9 -; GFX11-NEXT: v_readlane_b32 s48, v16, 8 -; GFX11-NEXT: v_readlane_b32 s39, v16, 7 -; GFX11-NEXT: v_readlane_b32 s38, v16, 6 -; GFX11-NEXT: v_readlane_b32 s37, v16, 5 -; GFX11-NEXT: v_readlane_b32 s36, v16, 4 -; GFX11-NEXT: v_readlane_b32 s35, v16, 3 -; GFX11-NEXT: v_readlane_b32 s34, v16, 2 +; GFX11-NEXT: v_readlane_b32 s104, v17, 6 +; GFX11-NEXT: v_readlane_b32 s103, v17, 5 +; GFX11-NEXT: v_readlane_b32 s102, v17, 4 +; GFX11-NEXT: v_readlane_b32 s101, v17, 3 +; GFX11-NEXT: v_readlane_b32 s96, v16, 30 +; GFX11-NEXT: v_readlane_b32 s87, v16, 29 +; GFX11-NEXT: v_readlane_b32 s85, v16, 27 +; GFX11-NEXT: v_readlane_b32 s84, v16, 26 +; GFX11-NEXT: v_readlane_b32 s83, v16, 25 +; GFX11-NEXT: v_readlane_b32 s82, v16, 24 +; GFX11-NEXT: v_readlane_b32 s81, v16, 23 +; GFX11-NEXT: v_readlane_b32 s80, v16, 22 +; GFX11-NEXT: v_readlane_b32 s71, v16, 21 +; GFX11-NEXT: v_readlane_b32 s70, v16, 20 +; GFX11-NEXT: v_readlane_b32 s68, v16, 18 +; GFX11-NEXT: v_readlane_b32 s67, v16, 17 +; GFX11-NEXT: v_readlane_b32 s66, v16, 16 +; GFX11-NEXT: v_readlane_b32 s65, v16, 15 +; GFX11-NEXT: v_readlane_b32 s64, v16, 14 +; GFX11-NEXT: v_readlane_b32 s55, v16, 13 +; GFX11-NEXT: v_readlane_b32 s54, v16, 12 +; GFX11-NEXT: v_readlane_b32 s53, v16, 11 +; GFX11-NEXT: v_readlane_b32 s52, v16, 10 +; GFX11-NEXT: v_readlane_b32 s51, v16, 9 +; GFX11-NEXT: v_readlane_b32 s50, v16, 8 +; GFX11-NEXT: v_readlane_b32 s49, v16, 7 +; GFX11-NEXT: v_readlane_b32 s48, v16, 6 +; GFX11-NEXT: v_readlane_b32 s39, v16, 5 +; GFX11-NEXT: v_readlane_b32 s38, v16, 4 +; GFX11-NEXT: v_readlane_b32 s37, v16, 3 +; GFX11-NEXT: v_readlane_b32 s36, v16, 2 +; GFX11-NEXT: v_readlane_b32 s35, v16, 1 +; GFX11-NEXT: v_readlane_b32 s34, v16, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v16, off, s32 @@ -16367,56 +16386,105 @@ define <32 x i32> @bitcast_v128i8_to_v32i32(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -20688,45 +20756,83 @@ define inreg <32 x i32> @bitcast_v128i8_to_v32i32_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -21629,45 +21735,83 @@ define inreg <32 x i32> @bitcast_v128i8_to_v32i32_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -23491,43 +23635,43 @@ define inreg <64 x bfloat> @bitcast_v32i32_to_v64bf16_scalar(<32 x i32> inreg %a ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s70, v1 ; SI-NEXT: v_readfirstlane_b32 s71, v2 ; SI-NEXT: v_readfirstlane_b32 s80, v3 @@ -24047,43 +24191,43 @@ define inreg <64 x bfloat> @bitcast_v32i32_to_v64bf16_scalar(<32 x i32> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s4 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -26134,20 +26278,35 @@ define <32 x i32> @bitcast_v64bf16_to_v32i32(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -29183,81 +29342,149 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v32i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -30157,83 +30384,153 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32i32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -32004,12 +32301,26 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s30, 2 +; SI-NEXT: v_writelane_b32 v63, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s35, 3 ; SI-NEXT: v_readfirstlane_b32 s47, v1 ; SI-NEXT: v_readfirstlane_b32 s46, v2 ; SI-NEXT: v_readfirstlane_b32 s45, v3 @@ -32029,21 +32340,6 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB21_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -32285,15 +32581,15 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s35, v63, 3 +; SI-NEXT: v_readlane_b32 s30, v63, 2 ; SI-NEXT: v_lshlrev_b32_e32 v61, 16, v61 ; SI-NEXT: v_or_b32_e32 v2, v2, v61 ; SI-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v2, v59 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v0 @@ -34734,84 +35030,155 @@ define inreg <32 x i32> @bitcast_v64f16_to_v32i32_scalar(<64 x half> inreg %a, i ; GFX11-LABEL: bitcast_v64f16_to_v32i32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -35567,28 +35934,29 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s30, 20 +; SI-NEXT: v_writelane_b32 v20, s31, 21 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -35608,7 +35976,6 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s69, 21 ; SI-NEXT: s_cbranch_scc0 .LBB25_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 16 @@ -35927,29 +36294,29 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 20 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 21 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -37784,84 +38151,155 @@ define inreg <32 x i32> @bitcast_v64i16_to_v32i32_scalar(<64 x i16> inreg %a, i3 ; GFX11-LABEL: bitcast_v64i16_to_v32i32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -43671,24 +44109,43 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -44362,44 +44819,56 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -44419,19 +44888,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB37_3 @@ -45566,42 +46022,42 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: v_and_b32_e32 v6, 0xff, v6 ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) expcnt(0) ; SI-NEXT: v_lshlrev_b32_e32 v18, 8, v22 ; SI-NEXT: v_and_b32_e32 v22, 0xff, v52 @@ -46101,39 +46557,53 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -46153,20 +46623,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB37_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -47021,38 +47477,38 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: v_lshlrev_b32_e32 v23, 8, v50 ; VI-NEXT: v_or_b32_sdwa v23, v24, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b32_e32 v24, 8, v36 -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_sdwa v24, v25, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v23, v23, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -47372,43 +47828,57 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -47428,20 +47898,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB37_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -48294,42 +48750,42 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v49 -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v15, v35, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -48658,86 +49114,104 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s4, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s5, v6 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s10, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s11, v12 -; GFX11-NEXT: v_readfirstlane_b32 s12, v13 -; GFX11-NEXT: v_readfirstlane_b32 s13, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 vcc_hi, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s4, v5 +; GFX11-NEXT: v_readfirstlane_b32 s5, v6 +; GFX11-NEXT: v_readfirstlane_b32 s6, v7 +; GFX11-NEXT: v_readfirstlane_b32 s7, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s10, v11 +; GFX11-NEXT: v_readfirstlane_b32 s11, v12 +; GFX11-NEXT: v_readfirstlane_b32 s12, v13 +; GFX11-NEXT: v_readfirstlane_b32 s13, v14 +; GFX11-NEXT: s_mov_b32 vcc_hi, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB37_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s13, 24 @@ -49627,47 +50101,47 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -54434,56 +54908,105 @@ define <32 x float> @bitcast_v128i8_to_v32f32(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -58755,45 +59278,83 @@ define inreg <32 x float> @bitcast_v128i8_to_v32f32_scalar(<128 x i8> inreg %a, ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -59696,45 +60257,83 @@ define inreg <32 x float> @bitcast_v128i8_to_v32f32_scalar(<128 x i8> inreg %a, ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -61541,44 +62140,57 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s6, v1 ; SI-NEXT: v_readfirstlane_b32 s7, v2 ; SI-NEXT: v_readfirstlane_b32 s8, v3 @@ -61598,20 +62210,6 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: v_readfirstlane_b32 s46, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s47, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB41_3 ; SI-NEXT: ; %bb.1: ; %cmp.false @@ -62146,42 +62744,42 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 @@ -64247,20 +64845,35 @@ define <32 x float> @bitcast_v64bf16_to_v32f32(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -67296,81 +67909,149 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v32f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -68270,83 +68951,153 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32f32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -70098,6 +70849,22 @@ define inreg <64 x half> @bitcast_v32f32_to_v64f16_scalar(<32 x float> inreg %a, ; SI-LABEL: bitcast_v32f32_to_v64f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: v_readfirstlane_b32 s47, v1 ; SI-NEXT: v_readfirstlane_b32 s46, v2 @@ -70118,22 +70885,6 @@ define inreg <64 x half> @bitcast_v32f32_to_v64f16_scalar(<32 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -72818,84 +73569,155 @@ define inreg <32 x float> @bitcast_v64f16_to_v32f32_scalar(<64 x half> inreg %a, ; GFX11-LABEL: bitcast_v64f16_to_v32f32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -73631,22 +74453,6 @@ define inreg <64 x i16> @bitcast_v32f32_to_v64i16_scalar(<32 x float> inreg %a, ; SI-LABEL: bitcast_v32f32_to_v64i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_mov_b32_e32 v31, s16 -; SI-NEXT: v_mov_b32_e32 v32, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v27, s20 -; SI-NEXT: v_mov_b32_e32 v28, s21 -; SI-NEXT: v_mov_b32_e32 v25, s22 -; SI-NEXT: v_mov_b32_e32 v26, s23 -; SI-NEXT: v_mov_b32_e32 v23, s24 -; SI-NEXT: v_mov_b32_e32 v24, s25 -; SI-NEXT: v_mov_b32_e32 v21, s26 -; SI-NEXT: v_mov_b32_e32 v22, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v19, s28 -; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -73663,6 +74469,22 @@ define inreg <64 x i16> @bitcast_v32f32_to_v64i16_scalar(<32 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 +; SI-NEXT: v_mov_b32_e32 v31, s16 +; SI-NEXT: v_mov_b32_e32 v32, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v27, s20 +; SI-NEXT: v_mov_b32_e32 v28, s21 +; SI-NEXT: v_mov_b32_e32 v25, s22 +; SI-NEXT: v_mov_b32_e32 v26, s23 +; SI-NEXT: v_mov_b32_e32 v23, s24 +; SI-NEXT: v_mov_b32_e32 v24, s25 +; SI-NEXT: v_mov_b32_e32 v21, s26 +; SI-NEXT: v_mov_b32_e32 v22, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v19, s28 +; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[36:37], v[17:18], 16 @@ -75822,84 +76644,155 @@ define inreg <32 x float> @bitcast_v64i16_to_v32f32_scalar(<64 x i16> inreg %a, ; GFX11-LABEL: bitcast_v64i16_to_v32f32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -80720,24 +81613,43 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -81437,42 +82349,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s98, 34 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -81492,7 +82405,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB57_4 @@ -82362,6 +83274,7 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v22, 11 ; SI-NEXT: v_readlane_b32 s17, v22, 17 ; SI-NEXT: v_readlane_b32 s15, v22, 23 @@ -82369,42 +83282,41 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v22, 35 ; SI-NEXT: v_readlane_b32 s9, v22, 39 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -82601,38 +83513,39 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -82652,7 +83565,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB57_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -83441,43 +84353,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: v_readlane_b32 s15, v21, 1 ; VI-NEXT: v_readlane_b32 s13, v21, 3 ; VI-NEXT: v_readlane_b32 s11, v21, 5 ; VI-NEXT: v_readlane_b32 s9, v21, 7 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -83650,42 +84562,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_writelane_b32 v20, s31, 1 -; GFX9-NEXT: v_writelane_b32 v20, s34, 2 -; GFX9-NEXT: v_writelane_b32 v20, s35, 3 -; GFX9-NEXT: v_writelane_b32 v20, s36, 4 -; GFX9-NEXT: v_writelane_b32 v20, s37, 5 -; GFX9-NEXT: v_writelane_b32 v20, s38, 6 -; GFX9-NEXT: v_writelane_b32 v20, s39, 7 -; GFX9-NEXT: v_writelane_b32 v20, s48, 8 -; GFX9-NEXT: v_writelane_b32 v20, s49, 9 -; GFX9-NEXT: v_writelane_b32 v20, s50, 10 -; GFX9-NEXT: v_writelane_b32 v20, s51, 11 -; GFX9-NEXT: v_writelane_b32 v20, s52, 12 -; GFX9-NEXT: v_writelane_b32 v20, s53, 13 -; GFX9-NEXT: v_writelane_b32 v20, s54, 14 -; GFX9-NEXT: v_writelane_b32 v20, s55, 15 -; GFX9-NEXT: v_writelane_b32 v20, s64, 16 -; GFX9-NEXT: v_writelane_b32 v20, s65, 17 -; GFX9-NEXT: v_writelane_b32 v20, s66, 18 -; GFX9-NEXT: v_writelane_b32 v20, s67, 19 -; GFX9-NEXT: v_writelane_b32 v20, s68, 20 -; GFX9-NEXT: v_writelane_b32 v20, s69, 21 -; GFX9-NEXT: v_writelane_b32 v20, s70, 22 -; GFX9-NEXT: v_writelane_b32 v20, s71, 23 -; GFX9-NEXT: v_writelane_b32 v20, s80, 24 -; GFX9-NEXT: v_writelane_b32 v20, s81, 25 -; GFX9-NEXT: v_writelane_b32 v20, s82, 26 -; GFX9-NEXT: v_writelane_b32 v20, s83, 27 -; GFX9-NEXT: v_writelane_b32 v20, s84, 28 -; GFX9-NEXT: v_writelane_b32 v20, s85, 29 -; GFX9-NEXT: v_writelane_b32 v20, s86, 30 -; GFX9-NEXT: v_writelane_b32 v20, s87, 31 -; GFX9-NEXT: v_writelane_b32 v20, s96, 32 -; GFX9-NEXT: v_writelane_b32 v20, s97, 33 +; GFX9-NEXT: v_writelane_b32 v20, s34, 0 +; GFX9-NEXT: v_writelane_b32 v20, s35, 1 +; GFX9-NEXT: v_writelane_b32 v20, s36, 2 +; GFX9-NEXT: v_writelane_b32 v20, s37, 3 +; GFX9-NEXT: v_writelane_b32 v20, s38, 4 +; GFX9-NEXT: v_writelane_b32 v20, s39, 5 +; GFX9-NEXT: v_writelane_b32 v20, s48, 6 +; GFX9-NEXT: v_writelane_b32 v20, s49, 7 +; GFX9-NEXT: v_writelane_b32 v20, s50, 8 +; GFX9-NEXT: v_writelane_b32 v20, s51, 9 +; GFX9-NEXT: v_writelane_b32 v20, s52, 10 +; GFX9-NEXT: v_writelane_b32 v20, s53, 11 +; GFX9-NEXT: v_writelane_b32 v20, s54, 12 +; GFX9-NEXT: v_writelane_b32 v20, s55, 13 +; GFX9-NEXT: v_writelane_b32 v20, s64, 14 +; GFX9-NEXT: v_writelane_b32 v20, s65, 15 +; GFX9-NEXT: v_writelane_b32 v20, s66, 16 +; GFX9-NEXT: v_writelane_b32 v20, s67, 17 +; GFX9-NEXT: v_writelane_b32 v20, s68, 18 +; GFX9-NEXT: v_writelane_b32 v20, s69, 19 +; GFX9-NEXT: v_writelane_b32 v20, s70, 20 +; GFX9-NEXT: v_writelane_b32 v20, s71, 21 +; GFX9-NEXT: v_writelane_b32 v20, s80, 22 +; GFX9-NEXT: v_writelane_b32 v20, s81, 23 +; GFX9-NEXT: v_writelane_b32 v20, s82, 24 +; GFX9-NEXT: v_writelane_b32 v20, s83, 25 +; GFX9-NEXT: v_writelane_b32 v20, s84, 26 +; GFX9-NEXT: v_writelane_b32 v20, s85, 27 +; GFX9-NEXT: v_writelane_b32 v20, s86, 28 +; GFX9-NEXT: v_writelane_b32 v20, s87, 29 +; GFX9-NEXT: v_writelane_b32 v20, s96, 30 +; GFX9-NEXT: v_writelane_b32 v20, s97, 31 +; GFX9-NEXT: v_writelane_b32 v20, s98, 32 +; GFX9-NEXT: v_writelane_b32 v20, s99, 33 +; GFX9-NEXT: v_writelane_b32 v20, s30, 34 +; GFX9-NEXT: v_writelane_b32 v20, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v20, s98, 34 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -83705,7 +84618,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: v_writelane_b32 v20, s99, 35 ; GFX9-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB57_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -84439,44 +85351,44 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:120 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v20, 34 ; GFX9-NEXT: v_readlane_b32 s9, v21, 1 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:124 -; GFX9-NEXT: v_readlane_b32 s99, v20, 35 -; GFX9-NEXT: v_readlane_b32 s98, v20, 34 -; GFX9-NEXT: v_readlane_b32 s97, v20, 33 -; GFX9-NEXT: v_readlane_b32 s96, v20, 32 -; GFX9-NEXT: v_readlane_b32 s87, v20, 31 -; GFX9-NEXT: v_readlane_b32 s86, v20, 30 -; GFX9-NEXT: v_readlane_b32 s85, v20, 29 -; GFX9-NEXT: v_readlane_b32 s84, v20, 28 -; GFX9-NEXT: v_readlane_b32 s83, v20, 27 -; GFX9-NEXT: v_readlane_b32 s82, v20, 26 -; GFX9-NEXT: v_readlane_b32 s81, v20, 25 -; GFX9-NEXT: v_readlane_b32 s80, v20, 24 -; GFX9-NEXT: v_readlane_b32 s71, v20, 23 -; GFX9-NEXT: v_readlane_b32 s70, v20, 22 -; GFX9-NEXT: v_readlane_b32 s69, v20, 21 -; GFX9-NEXT: v_readlane_b32 s68, v20, 20 -; GFX9-NEXT: v_readlane_b32 s67, v20, 19 -; GFX9-NEXT: v_readlane_b32 s66, v20, 18 -; GFX9-NEXT: v_readlane_b32 s65, v20, 17 -; GFX9-NEXT: v_readlane_b32 s64, v20, 16 -; GFX9-NEXT: v_readlane_b32 s55, v20, 15 -; GFX9-NEXT: v_readlane_b32 s54, v20, 14 -; GFX9-NEXT: v_readlane_b32 s53, v20, 13 -; GFX9-NEXT: v_readlane_b32 s52, v20, 12 -; GFX9-NEXT: v_readlane_b32 s51, v20, 11 -; GFX9-NEXT: v_readlane_b32 s50, v20, 10 -; GFX9-NEXT: v_readlane_b32 s49, v20, 9 -; GFX9-NEXT: v_readlane_b32 s48, v20, 8 -; GFX9-NEXT: v_readlane_b32 s39, v20, 7 -; GFX9-NEXT: v_readlane_b32 s38, v20, 6 -; GFX9-NEXT: v_readlane_b32 s37, v20, 5 -; GFX9-NEXT: v_readlane_b32 s36, v20, 4 -; GFX9-NEXT: v_readlane_b32 s35, v20, 3 -; GFX9-NEXT: v_readlane_b32 s34, v20, 2 -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 -; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 35 +; GFX9-NEXT: v_readlane_b32 s99, v20, 33 +; GFX9-NEXT: v_readlane_b32 s98, v20, 32 +; GFX9-NEXT: v_readlane_b32 s97, v20, 31 +; GFX9-NEXT: v_readlane_b32 s96, v20, 30 +; GFX9-NEXT: v_readlane_b32 s87, v20, 29 +; GFX9-NEXT: v_readlane_b32 s86, v20, 28 +; GFX9-NEXT: v_readlane_b32 s85, v20, 27 +; GFX9-NEXT: v_readlane_b32 s84, v20, 26 +; GFX9-NEXT: v_readlane_b32 s83, v20, 25 +; GFX9-NEXT: v_readlane_b32 s82, v20, 24 +; GFX9-NEXT: v_readlane_b32 s81, v20, 23 +; GFX9-NEXT: v_readlane_b32 s80, v20, 22 +; GFX9-NEXT: v_readlane_b32 s71, v20, 21 +; GFX9-NEXT: v_readlane_b32 s70, v20, 20 +; GFX9-NEXT: v_readlane_b32 s69, v20, 19 +; GFX9-NEXT: v_readlane_b32 s68, v20, 18 +; GFX9-NEXT: v_readlane_b32 s67, v20, 17 +; GFX9-NEXT: v_readlane_b32 s66, v20, 16 +; GFX9-NEXT: v_readlane_b32 s65, v20, 15 +; GFX9-NEXT: v_readlane_b32 s64, v20, 14 +; GFX9-NEXT: v_readlane_b32 s55, v20, 13 +; GFX9-NEXT: v_readlane_b32 s54, v20, 12 +; GFX9-NEXT: v_readlane_b32 s53, v20, 11 +; GFX9-NEXT: v_readlane_b32 s52, v20, 10 +; GFX9-NEXT: v_readlane_b32 s51, v20, 9 +; GFX9-NEXT: v_readlane_b32 s50, v20, 8 +; GFX9-NEXT: v_readlane_b32 s49, v20, 7 +; GFX9-NEXT: v_readlane_b32 s48, v20, 6 +; GFX9-NEXT: v_readlane_b32 s39, v20, 5 +; GFX9-NEXT: v_readlane_b32 s38, v20, 4 +; GFX9-NEXT: v_readlane_b32 s37, v20, 3 +; GFX9-NEXT: v_readlane_b32 s36, v20, 2 +; GFX9-NEXT: v_readlane_b32 s35, v20, 1 +; GFX9-NEXT: v_readlane_b32 s34, v20, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -84646,66 +85558,66 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v16, s30, 0 -; GFX11-NEXT: v_writelane_b32 v17, s96, 0 +; GFX11-NEXT: v_writelane_b32 v16, s34, 0 +; GFX11-NEXT: v_writelane_b32 v16, s35, 1 +; GFX11-NEXT: v_writelane_b32 v16, s36, 2 +; GFX11-NEXT: v_writelane_b32 v16, s37, 3 +; GFX11-NEXT: v_writelane_b32 v16, s38, 4 +; GFX11-NEXT: v_writelane_b32 v16, s39, 5 +; GFX11-NEXT: v_writelane_b32 v16, s48, 6 +; GFX11-NEXT: v_writelane_b32 v16, s49, 7 +; GFX11-NEXT: v_writelane_b32 v16, s50, 8 +; GFX11-NEXT: v_writelane_b32 v16, s51, 9 +; GFX11-NEXT: v_writelane_b32 v16, s52, 10 +; GFX11-NEXT: v_writelane_b32 v16, s53, 11 +; GFX11-NEXT: v_writelane_b32 v16, s54, 12 +; GFX11-NEXT: v_writelane_b32 v16, s55, 13 +; GFX11-NEXT: v_writelane_b32 v16, s64, 14 +; GFX11-NEXT: v_writelane_b32 v16, s65, 15 +; GFX11-NEXT: v_writelane_b32 v16, s66, 16 +; GFX11-NEXT: v_writelane_b32 v16, s67, 17 +; GFX11-NEXT: v_writelane_b32 v16, s68, 18 +; GFX11-NEXT: v_writelane_b32 v16, s69, 19 +; GFX11-NEXT: v_writelane_b32 v16, s70, 20 +; GFX11-NEXT: v_writelane_b32 v16, s71, 21 +; GFX11-NEXT: v_writelane_b32 v16, s80, 22 +; GFX11-NEXT: v_writelane_b32 v16, s81, 23 +; GFX11-NEXT: v_writelane_b32 v16, s82, 24 +; GFX11-NEXT: v_writelane_b32 v16, s83, 25 +; GFX11-NEXT: v_writelane_b32 v16, s84, 26 +; GFX11-NEXT: v_writelane_b32 v16, s85, 27 +; GFX11-NEXT: v_writelane_b32 v16, s86, 28 +; GFX11-NEXT: v_writelane_b32 v16, s87, 29 +; GFX11-NEXT: v_writelane_b32 v16, s96, 30 +; GFX11-NEXT: v_writelane_b32 v16, s97, 31 +; GFX11-NEXT: v_writelane_b32 v17, s98, 0 +; GFX11-NEXT: v_writelane_b32 v17, s99, 1 +; GFX11-NEXT: v_writelane_b32 v17, s100, 2 +; GFX11-NEXT: v_writelane_b32 v17, s101, 3 +; GFX11-NEXT: v_writelane_b32 v17, s102, 4 +; GFX11-NEXT: v_writelane_b32 v17, s103, 5 +; GFX11-NEXT: v_writelane_b32 v17, s104, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-NEXT: v_readfirstlane_b32 s40, v1 ; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v16, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s97, 1 ; GFX11-NEXT: v_readfirstlane_b32 s14, v3 ; GFX11-NEXT: v_readfirstlane_b32 s15, v4 ; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v16, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s98, 2 ; GFX11-NEXT: v_readfirstlane_b32 s13, v6 ; GFX11-NEXT: v_readfirstlane_b32 s10, v7 ; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v16, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s99, 3 ; GFX11-NEXT: v_readfirstlane_b32 s8, v9 ; GFX11-NEXT: v_readfirstlane_b32 s9, v10 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v16, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s100, 4 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: v_readfirstlane_b32 s4, v13 ; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v16, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s101, 5 ; GFX11-NEXT: s_mov_b32 s101, 0 ; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr19 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v16, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s102, 6 -; GFX11-NEXT: v_writelane_b32 v16, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s103, 7 -; GFX11-NEXT: v_writelane_b32 v16, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s104, 8 -; GFX11-NEXT: v_writelane_b32 v16, s49, 9 -; GFX11-NEXT: v_writelane_b32 v16, s50, 10 -; GFX11-NEXT: v_writelane_b32 v16, s51, 11 -; GFX11-NEXT: v_writelane_b32 v16, s52, 12 -; GFX11-NEXT: v_writelane_b32 v16, s53, 13 -; GFX11-NEXT: v_writelane_b32 v16, s54, 14 -; GFX11-NEXT: v_writelane_b32 v16, s55, 15 -; GFX11-NEXT: v_writelane_b32 v16, s64, 16 -; GFX11-NEXT: v_writelane_b32 v16, s65, 17 -; GFX11-NEXT: v_writelane_b32 v16, s66, 18 -; GFX11-NEXT: v_writelane_b32 v16, s67, 19 -; GFX11-NEXT: v_writelane_b32 v16, s68, 20 -; GFX11-NEXT: v_writelane_b32 v16, s69, 21 -; GFX11-NEXT: v_writelane_b32 v16, s70, 22 -; GFX11-NEXT: v_writelane_b32 v16, s71, 23 -; GFX11-NEXT: v_writelane_b32 v16, s80, 24 -; GFX11-NEXT: v_writelane_b32 v16, s81, 25 -; GFX11-NEXT: v_writelane_b32 v16, s82, 26 -; GFX11-NEXT: v_writelane_b32 v16, s83, 27 -; GFX11-NEXT: v_writelane_b32 v16, s84, 28 -; GFX11-NEXT: v_writelane_b32 v16, s85, 29 -; GFX11-NEXT: v_writelane_b32 v16, s86, 30 -; GFX11-NEXT: v_writelane_b32 v16, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB57_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s25, 8 @@ -85234,9 +86146,9 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff ; GFX11-NEXT: v_readlane_b32 s17, v19, 29 ; GFX11-NEXT: s_and_b32 s16, s16, 0xff -; GFX11-NEXT: v_readlane_b32 s100, v17, 4 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s99, v17, 3 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 ; GFX11-NEXT: s_or_b32 s2, s2, s3 ; GFX11-NEXT: s_and_b32 s3, s18, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 16 @@ -85270,7 +86182,7 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s16, s16, s17 ; GFX11-NEXT: v_readlane_b32 s17, v19, 21 ; GFX11-NEXT: s_lshl_b32 s16, s16, 16 -; GFX11-NEXT: v_readlane_b32 s98, v17, 2 +; GFX11-NEXT: v_readlane_b32 s100, v17, 2 ; GFX11-NEXT: s_or_b32 s3, s3, s16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v7, s2 :: v_dual_mov_b32 v8, s3 @@ -85290,12 +86202,12 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s0, s0, s1 ; GFX11-NEXT: s_and_b32 s1, s21, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 8 -; GFX11-NEXT: v_readlane_b32 s86, v16, 30 +; GFX11-NEXT: v_readlane_b32 s99, v17, 1 ; GFX11-NEXT: s_or_b32 s1, s1, s2 ; GFX11-NEXT: v_readlane_b32 s2, v19, 25 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-NEXT: v_readlane_b32 s31, v16, 1 -; GFX11-NEXT: v_readlane_b32 s30, v16, 0 +; GFX11-NEXT: v_readlane_b32 s98, v17, 0 +; GFX11-NEXT: v_readlane_b32 s86, v16, 28 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s2, s3 @@ -85332,9 +86244,9 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_lshl_b32 s3, s88, 8 ; GFX11-NEXT: s_and_b32 s16, s69, 0xff ; GFX11-NEXT: s_and_b32 s18, s72, 0xff -; GFX11-NEXT: v_readlane_b32 s97, v17, 1 +; GFX11-NEXT: v_readlane_b32 s97, v16, 31 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s69, v16, 21 +; GFX11-NEXT: v_readlane_b32 s69, v16, 19 ; GFX11-NEXT: s_or_b32 s1, s2, s3 ; GFX11-NEXT: v_readlane_b32 s3, v19, 16 ; GFX11-NEXT: s_and_b32 s2, s25, 0xff @@ -85541,39 +86453,39 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:112 -; GFX11-NEXT: v_readlane_b32 s104, v17, 8 -; GFX11-NEXT: v_readlane_b32 s103, v17, 7 -; GFX11-NEXT: v_readlane_b32 s102, v17, 6 -; GFX11-NEXT: v_readlane_b32 s101, v17, 5 -; GFX11-NEXT: v_readlane_b32 s96, v17, 0 -; GFX11-NEXT: v_readlane_b32 s87, v16, 31 -; GFX11-NEXT: v_readlane_b32 s85, v16, 29 -; GFX11-NEXT: v_readlane_b32 s84, v16, 28 -; GFX11-NEXT: v_readlane_b32 s83, v16, 27 -; GFX11-NEXT: v_readlane_b32 s82, v16, 26 -; GFX11-NEXT: v_readlane_b32 s81, v16, 25 -; GFX11-NEXT: v_readlane_b32 s80, v16, 24 -; GFX11-NEXT: v_readlane_b32 s71, v16, 23 -; GFX11-NEXT: v_readlane_b32 s70, v16, 22 -; GFX11-NEXT: v_readlane_b32 s68, v16, 20 -; GFX11-NEXT: v_readlane_b32 s67, v16, 19 -; GFX11-NEXT: v_readlane_b32 s66, v16, 18 -; GFX11-NEXT: v_readlane_b32 s65, v16, 17 -; GFX11-NEXT: v_readlane_b32 s64, v16, 16 -; GFX11-NEXT: v_readlane_b32 s55, v16, 15 -; GFX11-NEXT: v_readlane_b32 s54, v16, 14 -; GFX11-NEXT: v_readlane_b32 s53, v16, 13 -; GFX11-NEXT: v_readlane_b32 s52, v16, 12 -; GFX11-NEXT: v_readlane_b32 s51, v16, 11 -; GFX11-NEXT: v_readlane_b32 s50, v16, 10 -; GFX11-NEXT: v_readlane_b32 s49, v16, 9 -; GFX11-NEXT: v_readlane_b32 s48, v16, 8 -; GFX11-NEXT: v_readlane_b32 s39, v16, 7 -; GFX11-NEXT: v_readlane_b32 s38, v16, 6 -; GFX11-NEXT: v_readlane_b32 s37, v16, 5 -; GFX11-NEXT: v_readlane_b32 s36, v16, 4 -; GFX11-NEXT: v_readlane_b32 s35, v16, 3 -; GFX11-NEXT: v_readlane_b32 s34, v16, 2 +; GFX11-NEXT: v_readlane_b32 s104, v17, 6 +; GFX11-NEXT: v_readlane_b32 s103, v17, 5 +; GFX11-NEXT: v_readlane_b32 s102, v17, 4 +; GFX11-NEXT: v_readlane_b32 s101, v17, 3 +; GFX11-NEXT: v_readlane_b32 s96, v16, 30 +; GFX11-NEXT: v_readlane_b32 s87, v16, 29 +; GFX11-NEXT: v_readlane_b32 s85, v16, 27 +; GFX11-NEXT: v_readlane_b32 s84, v16, 26 +; GFX11-NEXT: v_readlane_b32 s83, v16, 25 +; GFX11-NEXT: v_readlane_b32 s82, v16, 24 +; GFX11-NEXT: v_readlane_b32 s81, v16, 23 +; GFX11-NEXT: v_readlane_b32 s80, v16, 22 +; GFX11-NEXT: v_readlane_b32 s71, v16, 21 +; GFX11-NEXT: v_readlane_b32 s70, v16, 20 +; GFX11-NEXT: v_readlane_b32 s68, v16, 18 +; GFX11-NEXT: v_readlane_b32 s67, v16, 17 +; GFX11-NEXT: v_readlane_b32 s66, v16, 16 +; GFX11-NEXT: v_readlane_b32 s65, v16, 15 +; GFX11-NEXT: v_readlane_b32 s64, v16, 14 +; GFX11-NEXT: v_readlane_b32 s55, v16, 13 +; GFX11-NEXT: v_readlane_b32 s54, v16, 12 +; GFX11-NEXT: v_readlane_b32 s53, v16, 11 +; GFX11-NEXT: v_readlane_b32 s52, v16, 10 +; GFX11-NEXT: v_readlane_b32 s51, v16, 9 +; GFX11-NEXT: v_readlane_b32 s50, v16, 8 +; GFX11-NEXT: v_readlane_b32 s49, v16, 7 +; GFX11-NEXT: v_readlane_b32 s48, v16, 6 +; GFX11-NEXT: v_readlane_b32 s39, v16, 5 +; GFX11-NEXT: v_readlane_b32 s38, v16, 4 +; GFX11-NEXT: v_readlane_b32 s37, v16, 3 +; GFX11-NEXT: v_readlane_b32 s36, v16, 2 +; GFX11-NEXT: v_readlane_b32 s35, v16, 1 +; GFX11-NEXT: v_readlane_b32 s34, v16, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v16, off, s32 @@ -90340,56 +91252,105 @@ define <16 x i64> @bitcast_v128i8_to_v16i64(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -94661,45 +95622,83 @@ define inreg <16 x i64> @bitcast_v128i8_to_v16i64_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -95602,45 +96601,83 @@ define inreg <16 x i64> @bitcast_v128i8_to_v16i64_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -97473,43 +98510,43 @@ define inreg <64 x bfloat> @bitcast_v16i64_to_v64bf16_scalar(<16 x i64> inreg %a ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s70, v1 ; SI-NEXT: v_readfirstlane_b32 s71, v2 ; SI-NEXT: v_readfirstlane_b32 s80, v3 @@ -97930,43 +98967,43 @@ define inreg <64 x bfloat> @bitcast_v16i64_to_v64bf16_scalar(<16 x i64> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s4 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -100095,20 +101132,35 @@ define <16 x i64> @bitcast_v64bf16_to_v16i64(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -103144,81 +104196,149 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -104118,83 +105238,153 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -105970,12 +107160,26 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s30, 2 +; SI-NEXT: v_writelane_b32 v63, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s35, 3 ; SI-NEXT: v_readfirstlane_b32 s46, v1 ; SI-NEXT: v_readfirstlane_b32 s47, v2 ; SI-NEXT: v_readfirstlane_b32 s44, v3 @@ -105995,21 +107199,6 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB65_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -106251,15 +107440,15 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s35, v63, 3 +; SI-NEXT: v_readlane_b32 s30, v63, 2 ; SI-NEXT: v_lshlrev_b32_e32 v61, 16, v61 ; SI-NEXT: v_or_b32_e32 v2, v2, v61 ; SI-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v2, v59 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v0 @@ -108708,84 +109897,155 @@ define inreg <16 x i64> @bitcast_v64f16_to_v16i64_scalar(<64 x half> inreg %a, i ; GFX11-LABEL: bitcast_v64f16_to_v16i64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -109547,28 +110807,29 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s30, 20 +; SI-NEXT: v_writelane_b32 v20, s31, 21 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -109588,7 +110849,6 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s69, 21 ; SI-NEXT: s_cbranch_scc0 .LBB69_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 16 @@ -109907,29 +111167,29 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 20 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 21 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -111772,84 +113032,155 @@ define inreg <16 x i64> @bitcast_v64i16_to_v16i64_scalar(<64 x i16> inreg %a, i3 ; GFX11-LABEL: bitcast_v64i16_to_v16i64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -115639,24 +116970,43 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -116330,44 +117680,56 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: v_readfirstlane_b32 s5, v2 ; SI-NEXT: v_readfirstlane_b32 s6, v3 @@ -116387,19 +117749,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s44, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s45, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB73_3 @@ -117485,42 +118834,42 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: v_and_b32_e32 v5, 0xff, v5 ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v26, 0xff, v26 ; SI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 @@ -118048,39 +119397,53 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s6, v1 ; VI-NEXT: v_readfirstlane_b32 s7, v2 ; VI-NEXT: v_readfirstlane_b32 s8, v3 @@ -118100,20 +119463,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB73_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -118910,38 +120259,38 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload ; VI-NEXT: v_or_b32_sdwa v32, v32, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshlrev_b32_e32 v34, 8, v42 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -119332,43 +120681,57 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s6, v1 ; GFX9-NEXT: v_readfirstlane_b32 s7, v2 ; GFX9-NEXT: v_readfirstlane_b32 s8, v3 @@ -119388,20 +120751,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB73_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -120224,42 +121573,42 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: v_or_b32_sdwa v18, v38, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v38, 8, v41 ; GFX9-NEXT: v_or_b32_sdwa v35, v35, v38 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v36, 8, v36 @@ -120617,87 +121966,106 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:92 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v76, s30, 0 -; GFX11-NEXT: v_writelane_b32 v77, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s4, v1 -; GFX11-NEXT: v_readfirstlane_b32 s5, v2 -; GFX11-NEXT: v_writelane_b32 v76, s31, 1 -; GFX11-NEXT: v_writelane_b32 v77, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 -; GFX11-NEXT: v_readfirstlane_b32 s7, v4 -; GFX11-NEXT: v_readfirstlane_b32 s8, v5 -; GFX11-NEXT: v_writelane_b32 v76, s34, 2 -; GFX11-NEXT: v_writelane_b32 v77, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s9, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v76, s35, 3 -; GFX11-NEXT: v_writelane_b32 v77, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s12, v9 -; GFX11-NEXT: v_readfirstlane_b32 s13, v10 -; GFX11-NEXT: v_readfirstlane_b32 s14, v11 -; GFX11-NEXT: v_writelane_b32 v76, s36, 4 -; GFX11-NEXT: v_writelane_b32 v77, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s15, v12 -; GFX11-NEXT: v_readfirstlane_b32 s40, v13 -; GFX11-NEXT: v_readfirstlane_b32 s41, v14 -; GFX11-NEXT: v_writelane_b32 v76, s37, 5 -; GFX11-NEXT: v_writelane_b32 v77, s101, 5 -; GFX11-NEXT: s_mov_b32 vcc_hi, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x13 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 -; GFX11-NEXT: v_writelane_b32 v76, s38, 6 -; GFX11-NEXT: v_writelane_b32 v77, s102, 6 +; GFX11-NEXT: v_writelane_b32 v76, s34, 0 +; GFX11-NEXT: v_writelane_b32 v76, s35, 1 +; GFX11-NEXT: v_writelane_b32 v76, s36, 2 +; GFX11-NEXT: v_writelane_b32 v76, s37, 3 +; GFX11-NEXT: v_writelane_b32 v76, s38, 4 +; GFX11-NEXT: v_writelane_b32 v76, s39, 5 +; GFX11-NEXT: v_writelane_b32 v76, s48, 6 +; GFX11-NEXT: v_writelane_b32 v76, s49, 7 +; GFX11-NEXT: v_writelane_b32 v76, s50, 8 +; GFX11-NEXT: v_writelane_b32 v76, s51, 9 +; GFX11-NEXT: v_writelane_b32 v76, s52, 10 +; GFX11-NEXT: v_writelane_b32 v76, s53, 11 +; GFX11-NEXT: v_writelane_b32 v76, s54, 12 +; GFX11-NEXT: v_writelane_b32 v76, s55, 13 +; GFX11-NEXT: v_writelane_b32 v76, s64, 14 +; GFX11-NEXT: v_writelane_b32 v76, s65, 15 +; GFX11-NEXT: v_writelane_b32 v76, s66, 16 +; GFX11-NEXT: v_writelane_b32 v76, s67, 17 +; GFX11-NEXT: v_writelane_b32 v76, s68, 18 +; GFX11-NEXT: v_writelane_b32 v76, s69, 19 +; GFX11-NEXT: v_writelane_b32 v76, s70, 20 +; GFX11-NEXT: v_writelane_b32 v76, s71, 21 +; GFX11-NEXT: v_writelane_b32 v76, s80, 22 +; GFX11-NEXT: v_writelane_b32 v76, s81, 23 +; GFX11-NEXT: v_writelane_b32 v76, s82, 24 +; GFX11-NEXT: v_writelane_b32 v76, s83, 25 +; GFX11-NEXT: v_writelane_b32 v76, s84, 26 +; GFX11-NEXT: v_writelane_b32 v76, s85, 27 +; GFX11-NEXT: v_writelane_b32 v76, s86, 28 +; GFX11-NEXT: v_writelane_b32 v76, s87, 29 +; GFX11-NEXT: v_writelane_b32 v76, s96, 30 +; GFX11-NEXT: v_writelane_b32 v76, s97, 31 +; GFX11-NEXT: v_writelane_b32 v77, s98, 0 +; GFX11-NEXT: v_writelane_b32 v77, s99, 1 +; GFX11-NEXT: v_writelane_b32 v77, s100, 2 +; GFX11-NEXT: v_writelane_b32 v77, s101, 3 +; GFX11-NEXT: v_writelane_b32 v77, s102, 4 +; GFX11-NEXT: v_writelane_b32 v77, s103, 5 +; GFX11-NEXT: v_writelane_b32 v77, s104, 6 +; GFX11-NEXT: v_writelane_b32 v77, s30, 7 +; GFX11-NEXT: v_writelane_b32 v77, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-NEXT: v_readfirstlane_b32 s5, v2 +; GFX11-NEXT: v_readfirstlane_b32 s6, v3 +; GFX11-NEXT: v_readfirstlane_b32 s7, v4 +; GFX11-NEXT: v_readfirstlane_b32 s8, v5 +; GFX11-NEXT: v_readfirstlane_b32 s9, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s12, v9 +; GFX11-NEXT: v_readfirstlane_b32 s13, v10 +; GFX11-NEXT: v_readfirstlane_b32 s14, v11 +; GFX11-NEXT: v_readfirstlane_b32 s15, v12 +; GFX11-NEXT: v_readfirstlane_b32 s40, v13 +; GFX11-NEXT: v_readfirstlane_b32 s41, v14 +; GFX11-NEXT: s_mov_b32 vcc_hi, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr79 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v76, s39, 7 -; GFX11-NEXT: v_writelane_b32 v77, s103, 7 -; GFX11-NEXT: v_writelane_b32 v76, s48, 8 -; GFX11-NEXT: v_writelane_b32 v77, s104, 8 -; GFX11-NEXT: v_writelane_b32 v76, s49, 9 -; GFX11-NEXT: v_writelane_b32 v76, s50, 10 -; GFX11-NEXT: v_writelane_b32 v76, s51, 11 -; GFX11-NEXT: v_writelane_b32 v76, s52, 12 -; GFX11-NEXT: v_writelane_b32 v76, s53, 13 -; GFX11-NEXT: v_writelane_b32 v76, s54, 14 -; GFX11-NEXT: v_writelane_b32 v76, s55, 15 -; GFX11-NEXT: v_writelane_b32 v76, s64, 16 -; GFX11-NEXT: v_writelane_b32 v76, s65, 17 -; GFX11-NEXT: v_writelane_b32 v76, s66, 18 -; GFX11-NEXT: v_writelane_b32 v76, s67, 19 -; GFX11-NEXT: v_writelane_b32 v76, s68, 20 -; GFX11-NEXT: v_writelane_b32 v76, s69, 21 -; GFX11-NEXT: v_writelane_b32 v76, s70, 22 -; GFX11-NEXT: v_writelane_b32 v76, s71, 23 -; GFX11-NEXT: v_writelane_b32 v76, s80, 24 -; GFX11-NEXT: v_writelane_b32 v76, s81, 25 -; GFX11-NEXT: v_writelane_b32 v76, s82, 26 -; GFX11-NEXT: v_writelane_b32 v76, s83, 27 -; GFX11-NEXT: v_writelane_b32 v76, s84, 28 -; GFX11-NEXT: v_writelane_b32 v76, s85, 29 -; GFX11-NEXT: v_writelane_b32 v76, s86, 30 -; GFX11-NEXT: v_writelane_b32 v76, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB73_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s13, 16 @@ -121575,47 +122943,47 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:72 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:76 -; GFX11-NEXT: v_readlane_b32 s104, v77, 8 -; GFX11-NEXT: v_readlane_b32 s103, v77, 7 -; GFX11-NEXT: v_readlane_b32 s102, v77, 6 -; GFX11-NEXT: v_readlane_b32 s101, v77, 5 -; GFX11-NEXT: v_readlane_b32 s100, v77, 4 -; GFX11-NEXT: v_readlane_b32 s99, v77, 3 -; GFX11-NEXT: v_readlane_b32 s98, v77, 2 -; GFX11-NEXT: v_readlane_b32 s97, v77, 1 -; GFX11-NEXT: v_readlane_b32 s96, v77, 0 -; GFX11-NEXT: v_readlane_b32 s87, v76, 31 -; GFX11-NEXT: v_readlane_b32 s86, v76, 30 -; GFX11-NEXT: v_readlane_b32 s85, v76, 29 -; GFX11-NEXT: v_readlane_b32 s84, v76, 28 -; GFX11-NEXT: v_readlane_b32 s83, v76, 27 -; GFX11-NEXT: v_readlane_b32 s82, v76, 26 -; GFX11-NEXT: v_readlane_b32 s81, v76, 25 -; GFX11-NEXT: v_readlane_b32 s80, v76, 24 -; GFX11-NEXT: v_readlane_b32 s71, v76, 23 -; GFX11-NEXT: v_readlane_b32 s70, v76, 22 -; GFX11-NEXT: v_readlane_b32 s69, v76, 21 -; GFX11-NEXT: v_readlane_b32 s68, v76, 20 -; GFX11-NEXT: v_readlane_b32 s67, v76, 19 -; GFX11-NEXT: v_readlane_b32 s66, v76, 18 -; GFX11-NEXT: v_readlane_b32 s65, v76, 17 -; GFX11-NEXT: v_readlane_b32 s64, v76, 16 -; GFX11-NEXT: v_readlane_b32 s55, v76, 15 -; GFX11-NEXT: v_readlane_b32 s54, v76, 14 -; GFX11-NEXT: v_readlane_b32 s53, v76, 13 -; GFX11-NEXT: v_readlane_b32 s52, v76, 12 -; GFX11-NEXT: v_readlane_b32 s51, v76, 11 -; GFX11-NEXT: v_readlane_b32 s50, v76, 10 -; GFX11-NEXT: v_readlane_b32 s49, v76, 9 -; GFX11-NEXT: v_readlane_b32 s48, v76, 8 -; GFX11-NEXT: v_readlane_b32 s39, v76, 7 -; GFX11-NEXT: v_readlane_b32 s38, v76, 6 -; GFX11-NEXT: v_readlane_b32 s37, v76, 5 -; GFX11-NEXT: v_readlane_b32 s36, v76, 4 -; GFX11-NEXT: v_readlane_b32 s35, v76, 3 -; GFX11-NEXT: v_readlane_b32 s34, v76, 2 -; GFX11-NEXT: v_readlane_b32 s31, v76, 1 -; GFX11-NEXT: v_readlane_b32 s30, v76, 0 +; GFX11-NEXT: v_readlane_b32 s30, v77, 7 +; GFX11-NEXT: v_readlane_b32 s31, v77, 8 +; GFX11-NEXT: v_readlane_b32 s104, v77, 6 +; GFX11-NEXT: v_readlane_b32 s103, v77, 5 +; GFX11-NEXT: v_readlane_b32 s102, v77, 4 +; GFX11-NEXT: v_readlane_b32 s101, v77, 3 +; GFX11-NEXT: v_readlane_b32 s100, v77, 2 +; GFX11-NEXT: v_readlane_b32 s99, v77, 1 +; GFX11-NEXT: v_readlane_b32 s98, v77, 0 +; GFX11-NEXT: v_readlane_b32 s97, v76, 31 +; GFX11-NEXT: v_readlane_b32 s96, v76, 30 +; GFX11-NEXT: v_readlane_b32 s87, v76, 29 +; GFX11-NEXT: v_readlane_b32 s86, v76, 28 +; GFX11-NEXT: v_readlane_b32 s85, v76, 27 +; GFX11-NEXT: v_readlane_b32 s84, v76, 26 +; GFX11-NEXT: v_readlane_b32 s83, v76, 25 +; GFX11-NEXT: v_readlane_b32 s82, v76, 24 +; GFX11-NEXT: v_readlane_b32 s81, v76, 23 +; GFX11-NEXT: v_readlane_b32 s80, v76, 22 +; GFX11-NEXT: v_readlane_b32 s71, v76, 21 +; GFX11-NEXT: v_readlane_b32 s70, v76, 20 +; GFX11-NEXT: v_readlane_b32 s69, v76, 19 +; GFX11-NEXT: v_readlane_b32 s68, v76, 18 +; GFX11-NEXT: v_readlane_b32 s67, v76, 17 +; GFX11-NEXT: v_readlane_b32 s66, v76, 16 +; GFX11-NEXT: v_readlane_b32 s65, v76, 15 +; GFX11-NEXT: v_readlane_b32 s64, v76, 14 +; GFX11-NEXT: v_readlane_b32 s55, v76, 13 +; GFX11-NEXT: v_readlane_b32 s54, v76, 12 +; GFX11-NEXT: v_readlane_b32 s53, v76, 11 +; GFX11-NEXT: v_readlane_b32 s52, v76, 10 +; GFX11-NEXT: v_readlane_b32 s51, v76, 9 +; GFX11-NEXT: v_readlane_b32 s50, v76, 8 +; GFX11-NEXT: v_readlane_b32 s49, v76, 7 +; GFX11-NEXT: v_readlane_b32 s48, v76, 6 +; GFX11-NEXT: v_readlane_b32 s39, v76, 5 +; GFX11-NEXT: v_readlane_b32 s38, v76, 4 +; GFX11-NEXT: v_readlane_b32 s37, v76, 3 +; GFX11-NEXT: v_readlane_b32 s36, v76, 2 +; GFX11-NEXT: v_readlane_b32 s35, v76, 1 +; GFX11-NEXT: v_readlane_b32 s34, v76, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:80 @@ -126382,56 +127750,105 @@ define <16 x double> @bitcast_v128i8_to_v16f64(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -130703,45 +132120,83 @@ define inreg <16 x double> @bitcast_v128i8_to_v16f64_scalar(<128 x i8> inreg %a, ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -131644,45 +133099,83 @@ define inreg <16 x double> @bitcast_v128i8_to_v16f64_scalar(<128 x i8> inreg %a, ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -133422,44 +134915,57 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -133479,20 +134985,6 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s7, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB77_3 ; SI-NEXT: ; %bb.1: ; %cmp.false @@ -133896,42 +135388,42 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v8 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 @@ -136085,20 +137577,35 @@ define <16 x double> @bitcast_v64bf16_to_v16f64(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -139134,81 +140641,149 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -140108,83 +141683,153 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16f64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -141867,6 +143512,22 @@ define inreg <64 x half> @bitcast_v16f64_to_v64f16_scalar(<16 x double> inreg %a ; SI-LABEL: bitcast_v16f64_to_v64f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 @@ -141887,22 +143548,6 @@ define inreg <64 x half> @bitcast_v16f64_to_v64f16_scalar(<16 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB81_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s46, s5, 16 @@ -144578,84 +146223,155 @@ define inreg <16 x double> @bitcast_v64f16_to_v16f64_scalar(<64 x half> inreg %a ; GFX11-LABEL: bitcast_v64f16_to_v16f64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -145343,22 +147059,6 @@ define inreg <64 x i16> @bitcast_v16f64_to_v64i16_scalar(<16 x double> inreg %a, ; SI-LABEL: bitcast_v16f64_to_v64i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_mov_b32_e32 v31, s16 -; SI-NEXT: v_mov_b32_e32 v32, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v27, s20 -; SI-NEXT: v_mov_b32_e32 v28, s21 -; SI-NEXT: v_mov_b32_e32 v25, s22 -; SI-NEXT: v_mov_b32_e32 v26, s23 -; SI-NEXT: v_mov_b32_e32 v23, s24 -; SI-NEXT: v_mov_b32_e32 v24, s25 -; SI-NEXT: v_mov_b32_e32 v21, s26 -; SI-NEXT: v_mov_b32_e32 v22, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v19, s28 -; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -145375,6 +147075,22 @@ define inreg <64 x i16> @bitcast_v16f64_to_v64i16_scalar(<16 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 +; SI-NEXT: v_mov_b32_e32 v31, s16 +; SI-NEXT: v_mov_b32_e32 v32, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v27, s20 +; SI-NEXT: v_mov_b32_e32 v28, s21 +; SI-NEXT: v_mov_b32_e32 v25, s22 +; SI-NEXT: v_mov_b32_e32 v26, s23 +; SI-NEXT: v_mov_b32_e32 v23, s24 +; SI-NEXT: v_mov_b32_e32 v24, s25 +; SI-NEXT: v_mov_b32_e32 v21, s26 +; SI-NEXT: v_mov_b32_e32 v22, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v19, s28 +; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: s_cbranch_scc0 .LBB85_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v18 @@ -147486,84 +149202,155 @@ define inreg <16 x double> @bitcast_v64i16_to_v16f64_scalar(<64 x i16> inreg %a, ; GFX11-LABEL: bitcast_v64i16_to_v16f64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -153092,53 +154879,99 @@ define <64 x bfloat> @bitcast_v128i8_to_v64bf16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -154031,6 +155864,43 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v41, s34, 0 +; SI-NEXT: v_writelane_b32 v41, s35, 1 +; SI-NEXT: v_writelane_b32 v41, s36, 2 +; SI-NEXT: v_writelane_b32 v41, s37, 3 +; SI-NEXT: v_writelane_b32 v41, s38, 4 +; SI-NEXT: v_writelane_b32 v41, s39, 5 +; SI-NEXT: v_writelane_b32 v41, s48, 6 +; SI-NEXT: v_writelane_b32 v41, s49, 7 +; SI-NEXT: v_writelane_b32 v41, s50, 8 +; SI-NEXT: v_writelane_b32 v41, s51, 9 +; SI-NEXT: v_writelane_b32 v41, s52, 10 +; SI-NEXT: v_writelane_b32 v41, s53, 11 +; SI-NEXT: v_writelane_b32 v41, s54, 12 +; SI-NEXT: v_writelane_b32 v41, s55, 13 +; SI-NEXT: v_writelane_b32 v41, s64, 14 +; SI-NEXT: v_writelane_b32 v41, s65, 15 +; SI-NEXT: v_writelane_b32 v41, s66, 16 +; SI-NEXT: v_writelane_b32 v41, s67, 17 +; SI-NEXT: v_writelane_b32 v41, s68, 18 +; SI-NEXT: v_writelane_b32 v41, s69, 19 +; SI-NEXT: v_writelane_b32 v41, s70, 20 +; SI-NEXT: v_writelane_b32 v41, s71, 21 +; SI-NEXT: v_writelane_b32 v41, s80, 22 +; SI-NEXT: v_writelane_b32 v41, s81, 23 +; SI-NEXT: v_writelane_b32 v41, s82, 24 +; SI-NEXT: v_writelane_b32 v41, s83, 25 +; SI-NEXT: v_writelane_b32 v41, s84, 26 +; SI-NEXT: v_writelane_b32 v41, s85, 27 +; SI-NEXT: v_writelane_b32 v41, s86, 28 +; SI-NEXT: v_writelane_b32 v41, s87, 29 +; SI-NEXT: v_writelane_b32 v41, s96, 30 +; SI-NEXT: v_writelane_b32 v41, s97, 31 +; SI-NEXT: v_writelane_b32 v41, s98, 32 +; SI-NEXT: v_writelane_b32 v41, s99, 33 +; SI-NEXT: v_writelane_b32 v41, s30, 34 +; SI-NEXT: v_writelane_b32 v41, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:328 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 @@ -154047,44 +155917,8 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_writelane_b32 v43, s17, 2 ; SI-NEXT: v_writelane_b32 v43, s16, 3 ; SI-NEXT: s_mov_b32 s60, s24 -; SI-NEXT: v_writelane_b32 v41, s30, 0 -; SI-NEXT: v_writelane_b32 v41, s31, 1 -; SI-NEXT: v_writelane_b32 v41, s34, 2 -; SI-NEXT: v_writelane_b32 v41, s35, 3 -; SI-NEXT: v_writelane_b32 v41, s36, 4 -; SI-NEXT: v_writelane_b32 v41, s37, 5 -; SI-NEXT: v_writelane_b32 v41, s38, 6 -; SI-NEXT: v_writelane_b32 v41, s39, 7 -; SI-NEXT: v_writelane_b32 v41, s48, 8 -; SI-NEXT: v_writelane_b32 v41, s49, 9 -; SI-NEXT: v_writelane_b32 v41, s50, 10 -; SI-NEXT: v_writelane_b32 v41, s51, 11 -; SI-NEXT: v_writelane_b32 v41, s52, 12 -; SI-NEXT: v_writelane_b32 v41, s53, 13 -; SI-NEXT: v_writelane_b32 v41, s54, 14 -; SI-NEXT: v_writelane_b32 v41, s55, 15 -; SI-NEXT: v_writelane_b32 v41, s64, 16 -; SI-NEXT: v_writelane_b32 v41, s65, 17 -; SI-NEXT: v_writelane_b32 v41, s66, 18 -; SI-NEXT: v_writelane_b32 v41, s67, 19 -; SI-NEXT: v_writelane_b32 v41, s68, 20 -; SI-NEXT: v_writelane_b32 v41, s69, 21 -; SI-NEXT: v_writelane_b32 v41, s70, 22 -; SI-NEXT: v_writelane_b32 v41, s71, 23 ; SI-NEXT: s_mov_b32 s77, s28 ; SI-NEXT: s_mov_b32 s76, s27 -; SI-NEXT: v_writelane_b32 v41, s80, 24 -; SI-NEXT: v_writelane_b32 v41, s81, 25 -; SI-NEXT: v_writelane_b32 v41, s82, 26 -; SI-NEXT: v_writelane_b32 v41, s83, 27 -; SI-NEXT: v_writelane_b32 v41, s84, 28 -; SI-NEXT: v_writelane_b32 v41, s85, 29 -; SI-NEXT: v_writelane_b32 v41, s86, 30 -; SI-NEXT: v_writelane_b32 v41, s87, 31 -; SI-NEXT: v_writelane_b32 v41, s96, 32 -; SI-NEXT: v_writelane_b32 v41, s97, 33 -; SI-NEXT: v_writelane_b32 v41, s98, 34 -; SI-NEXT: v_writelane_b32 v41, s99, 35 ; SI-NEXT: s_mov_b32 s79, s26 ; SI-NEXT: v_readfirstlane_b32 s38, v20 ; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane @@ -154114,6 +155948,17 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_readfirstlane_b32 s88, v4 ; SI-NEXT: v_readfirstlane_b32 s89, v3 ; SI-NEXT: v_readfirstlane_b32 s90, v9 +; SI-NEXT: v_readfirstlane_b32 s91, v10 +; SI-NEXT: v_readfirstlane_b32 s92, v8 +; SI-NEXT: v_readfirstlane_b32 s93, v7 +; SI-NEXT: v_readfirstlane_b32 s94, v13 +; SI-NEXT: v_readfirstlane_b32 s95, v14 +; SI-NEXT: v_readfirstlane_b32 s30, v17 +; SI-NEXT: v_readfirstlane_b32 s31, v18 +; SI-NEXT: v_readfirstlane_b32 s34, v16 +; SI-NEXT: v_readfirstlane_b32 s35, v15 +; SI-NEXT: v_readfirstlane_b32 s36, v21 +; SI-NEXT: v_readfirstlane_b32 s37, v22 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s6, v31 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:300 @@ -154149,17 +155994,6 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s4, v38 ; SI-NEXT: v_writelane_b32 v43, s4, 10 -; SI-NEXT: v_readfirstlane_b32 s91, v10 -; SI-NEXT: v_readfirstlane_b32 s92, v8 -; SI-NEXT: v_readfirstlane_b32 s93, v7 -; SI-NEXT: v_readfirstlane_b32 s94, v13 -; SI-NEXT: v_readfirstlane_b32 s95, v14 -; SI-NEXT: v_readfirstlane_b32 s30, v17 -; SI-NEXT: v_readfirstlane_b32 s31, v18 -; SI-NEXT: v_readfirstlane_b32 s34, v16 -; SI-NEXT: v_readfirstlane_b32 s35, v15 -; SI-NEXT: v_readfirstlane_b32 s36, v21 -; SI-NEXT: v_readfirstlane_b32 s37, v22 ; SI-NEXT: s_waitcnt vmcnt(11) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 11 @@ -155638,42 +157472,42 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v41, 35 -; SI-NEXT: v_readlane_b32 s98, v41, 34 -; SI-NEXT: v_readlane_b32 s97, v41, 33 -; SI-NEXT: v_readlane_b32 s96, v41, 32 -; SI-NEXT: v_readlane_b32 s87, v41, 31 -; SI-NEXT: v_readlane_b32 s86, v41, 30 -; SI-NEXT: v_readlane_b32 s85, v41, 29 -; SI-NEXT: v_readlane_b32 s84, v41, 28 -; SI-NEXT: v_readlane_b32 s83, v41, 27 -; SI-NEXT: v_readlane_b32 s82, v41, 26 -; SI-NEXT: v_readlane_b32 s81, v41, 25 -; SI-NEXT: v_readlane_b32 s80, v41, 24 -; SI-NEXT: v_readlane_b32 s71, v41, 23 -; SI-NEXT: v_readlane_b32 s70, v41, 22 -; SI-NEXT: v_readlane_b32 s69, v41, 21 -; SI-NEXT: v_readlane_b32 s68, v41, 20 -; SI-NEXT: v_readlane_b32 s67, v41, 19 -; SI-NEXT: v_readlane_b32 s66, v41, 18 -; SI-NEXT: v_readlane_b32 s65, v41, 17 -; SI-NEXT: v_readlane_b32 s64, v41, 16 -; SI-NEXT: v_readlane_b32 s55, v41, 15 -; SI-NEXT: v_readlane_b32 s54, v41, 14 -; SI-NEXT: v_readlane_b32 s53, v41, 13 -; SI-NEXT: v_readlane_b32 s52, v41, 12 -; SI-NEXT: v_readlane_b32 s51, v41, 11 -; SI-NEXT: v_readlane_b32 s50, v41, 10 -; SI-NEXT: v_readlane_b32 s49, v41, 9 -; SI-NEXT: v_readlane_b32 s48, v41, 8 -; SI-NEXT: v_readlane_b32 s39, v41, 7 -; SI-NEXT: v_readlane_b32 s38, v41, 6 -; SI-NEXT: v_readlane_b32 s37, v41, 5 -; SI-NEXT: v_readlane_b32 s36, v41, 4 -; SI-NEXT: v_readlane_b32 s35, v41, 3 -; SI-NEXT: v_readlane_b32 s34, v41, 2 -; SI-NEXT: v_readlane_b32 s31, v41, 1 -; SI-NEXT: v_readlane_b32 s30, v41, 0 +; SI-NEXT: v_readlane_b32 s30, v41, 34 +; SI-NEXT: v_readlane_b32 s31, v41, 35 +; SI-NEXT: v_readlane_b32 s99, v41, 33 +; SI-NEXT: v_readlane_b32 s98, v41, 32 +; SI-NEXT: v_readlane_b32 s97, v41, 31 +; SI-NEXT: v_readlane_b32 s96, v41, 30 +; SI-NEXT: v_readlane_b32 s87, v41, 29 +; SI-NEXT: v_readlane_b32 s86, v41, 28 +; SI-NEXT: v_readlane_b32 s85, v41, 27 +; SI-NEXT: v_readlane_b32 s84, v41, 26 +; SI-NEXT: v_readlane_b32 s83, v41, 25 +; SI-NEXT: v_readlane_b32 s82, v41, 24 +; SI-NEXT: v_readlane_b32 s81, v41, 23 +; SI-NEXT: v_readlane_b32 s80, v41, 22 +; SI-NEXT: v_readlane_b32 s71, v41, 21 +; SI-NEXT: v_readlane_b32 s70, v41, 20 +; SI-NEXT: v_readlane_b32 s69, v41, 19 +; SI-NEXT: v_readlane_b32 s68, v41, 18 +; SI-NEXT: v_readlane_b32 s67, v41, 17 +; SI-NEXT: v_readlane_b32 s66, v41, 16 +; SI-NEXT: v_readlane_b32 s65, v41, 15 +; SI-NEXT: v_readlane_b32 s64, v41, 14 +; SI-NEXT: v_readlane_b32 s55, v41, 13 +; SI-NEXT: v_readlane_b32 s54, v41, 12 +; SI-NEXT: v_readlane_b32 s53, v41, 11 +; SI-NEXT: v_readlane_b32 s52, v41, 10 +; SI-NEXT: v_readlane_b32 s51, v41, 9 +; SI-NEXT: v_readlane_b32 s50, v41, 8 +; SI-NEXT: v_readlane_b32 s49, v41, 7 +; SI-NEXT: v_readlane_b32 s48, v41, 6 +; SI-NEXT: v_readlane_b32 s39, v41, 5 +; SI-NEXT: v_readlane_b32 s38, v41, 4 +; SI-NEXT: v_readlane_b32 s37, v41, 3 +; SI-NEXT: v_readlane_b32 s36, v41, 2 +; SI-NEXT: v_readlane_b32 s35, v41, 1 +; SI-NEXT: v_readlane_b32 s34, v41, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload @@ -157873,35 +159707,65 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -158645,35 +160509,65 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -164693,65 +166587,123 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:124 ; GFX11-TRUE16-NEXT: s_clause 0x1b ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:12 ; GFX11-TRUE16-NEXT: s_clause 0x2 ; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:8 @@ -165860,26 +167812,47 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x15 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -167067,6 +169040,42 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 @@ -167088,62 +169097,26 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:64 ; SI-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:72 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 ; SI-NEXT: s_waitcnt expcnt(5) ; SI-NEXT: v_mul_f32_e32 v56, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v10 -; SI-NEXT: v_writelane_b32 v63, s84, 28 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v9 -; SI-NEXT: v_writelane_b32 v63, s85, 29 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v14 -; SI-NEXT: v_writelane_b32 v63, s86, 30 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v15 -; SI-NEXT: v_writelane_b32 v63, s87, 31 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v18 -; SI-NEXT: v_writelane_b32 v63, s96, 32 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v26 -; SI-NEXT: v_writelane_b32 v63, s97, 33 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; SI-NEXT: v_writelane_b32 v63, s98, 34 ; SI-NEXT: v_mov_b32_e32 v46, v21 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_mul_f32_e32 v47, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v32, 1.0, v4 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v3 @@ -167153,8 +169126,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v61, 1.0, v7 ; SI-NEXT: v_mul_f32_e32 v5, 1.0, v12 ; SI-NEXT: v_mul_f32_e32 v60, 1.0, v11 -; SI-NEXT: s_waitcnt vmcnt(14) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: v_mul_f32_e32 v13, 1.0, v13 ; SI-NEXT: v_mul_f32_e32 v21, 1.0, v16 ; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17 @@ -167162,18 +169133,29 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v12, 1.0, v19 ; SI-NEXT: v_mul_f32_e32 v22, 1.0, v22 ; SI-NEXT: v_mul_f32_e32 v20, 1.0, v46 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v48 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; SI-NEXT: v_mul_f32_e32 v24, 1.0, v24 ; SI-NEXT: v_mul_f32_e32 v46, 1.0, v23 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v52 ; SI-NEXT: v_mul_f32_e32 v26, 1.0, v25 ; SI-NEXT: v_mul_f32_e32 v57, 1.0, v28 ; SI-NEXT: v_mul_f32_e32 v16, 1.0, v27 ; SI-NEXT: v_mul_f32_e32 v28, 1.0, v30 ; SI-NEXT: v_mul_f32_e32 v30, 1.0, v29 +; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16 +; SI-NEXT: v_mul_f32_e64 v3, 1.0, s19 +; SI-NEXT: v_mul_f32_e64 v4, 1.0, s18 +; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21 +; SI-NEXT: v_mul_f32_e64 v15, 1.0, s20 +; SI-NEXT: v_mul_f32_e64 v7, 1.0, s23 +; SI-NEXT: v_mul_f32_e64 v6, 1.0, s22 +; SI-NEXT: v_mul_f32_e64 v18, 1.0, s25 +; SI-NEXT: v_mul_f32_e64 v19, 1.0, s24 +; SI-NEXT: v_mul_f32_e64 v10, 1.0, s27 +; SI-NEXT: v_mul_f32_e64 v8, 1.0, s26 +; SI-NEXT: v_mul_f32_e64 v23, 1.0, s29 +; SI-NEXT: v_mul_f32_e64 v25, 1.0, s28 +; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: v_mul_f32_e32 v31, 1.0, v33 ; SI-NEXT: v_mul_f32_e32 v27, 1.0, v34 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec @@ -167181,8 +169163,13 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v36, 1.0, v36 ; SI-NEXT: v_mul_f32_e32 v35, 1.0, v37 ; SI-NEXT: v_mul_f32_e32 v34, 1.0, v38 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v48 +; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; SI-NEXT: v_mul_f32_e32 v37, 1.0, v39 ; SI-NEXT: v_mul_f32_e32 v48, 1.0, v49 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v52 ; SI-NEXT: v_mul_f32_e32 v39, 1.0, v50 ; SI-NEXT: v_mul_f32_e32 v33, 1.0, v51 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill @@ -167203,20 +169190,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v53, 1.0, v45 ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s17 -; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16 -; SI-NEXT: v_mul_f32_e64 v3, 1.0, s19 -; SI-NEXT: v_mul_f32_e64 v4, 1.0, s18 -; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21 -; SI-NEXT: v_mul_f32_e64 v15, 1.0, s20 -; SI-NEXT: v_mul_f32_e64 v7, 1.0, s23 -; SI-NEXT: v_mul_f32_e64 v6, 1.0, s22 -; SI-NEXT: v_mul_f32_e64 v18, 1.0, s25 -; SI-NEXT: v_mul_f32_e64 v19, 1.0, s24 -; SI-NEXT: v_mul_f32_e64 v10, 1.0, s27 -; SI-NEXT: v_mul_f32_e64 v8, 1.0, s26 -; SI-NEXT: v_mul_f32_e64 v23, 1.0, s29 -; SI-NEXT: v_mul_f32_e64 v25, 1.0, s28 -; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill @@ -168252,24 +170225,23 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: s_lshl_b32 s4, s4, 8 ; SI-NEXT: v_readlane_b32 s7, v62, 1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s31, v63, 1 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 ; SI-NEXT: v_or_b32_e32 v1, s5, v1 @@ -168479,7 +170451,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s34, 8 ; SI-NEXT: s_lshl_b32 s6, s90, 24 -; SI-NEXT: v_readlane_b32 s34, v63, 2 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -168510,8 +170482,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s38, 8 ; SI-NEXT: s_lshl_b32 s6, s30, 24 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s38, v63, 4 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -168539,9 +170512,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: s_lshl_b32 s5, s52, 8 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s6, s48, 24 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s36, v63, 4 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s36, v63, 2 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 @@ -168574,9 +170547,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s68, 8 ; SI-NEXT: s_lshl_b32 s6, s54, 24 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s50, v63, 10 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s50, v63, 8 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 @@ -168611,9 +170584,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s82, 8 ; SI-NEXT: s_lshl_b32 s6, s66, 24 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s64, v63, 16 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s64, v63, 14 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -168641,9 +170614,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s96, 8 ; SI-NEXT: s_lshl_b32 s6, s80, 24 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s70, v63, 22 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s70, v63, 20 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168675,8 +170648,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xff, v46 ; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s86, 24 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s84, v63, 28 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s84, v63, 26 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168708,7 +170681,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_and_b32_e32 v1, 0xff, v38 -; SI-NEXT: v_readlane_b32 s98, v63, 34 +; SI-NEXT: v_readlane_b32 s98, v63, 32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168748,39 +170721,53 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 ; VI-NEXT: v_readfirstlane_b32 s42, v5 @@ -168800,20 +170787,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB91_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -170172,38 +172145,38 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_lshlrev_b32_e32 v36, 8, v33 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload ; VI-NEXT: v_or_b32_sdwa v17, v17, v36 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v36, 8, v33 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload @@ -170602,43 +172575,57 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s76, v3 ; GFX9-NEXT: v_readfirstlane_b32 s77, v4 ; GFX9-NEXT: v_readfirstlane_b32 s74, v5 @@ -170658,20 +172645,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -172055,42 +174028,42 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_or_b32_sdwa v2, v44, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX9-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_or_b32_sdwa v7, v30, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v7, v7, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -172215,66 +174188,66 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s96, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s64, 14 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s65, 15 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s66, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s67, 17 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s68, 18 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s69, 19 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s70, 20 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s71, 21 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s80, 22 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s81, 23 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s82, 24 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s83, 25 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s84, 26 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s85, 27 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s86, 28 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s87, 29 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s96, 30 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s97, 31 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s98, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s99, 1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s100, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s101, 3 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s102, 4 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s103, 5 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s104, 6 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s30, 7 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s31, 8 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s72, v1 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s73, v2 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s97, 1 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s62, v3 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s63, v4 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s60, v5 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s98, 2 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s61, v6 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s58, v7 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s59, v8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s99, 3 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s46, v9 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s47, v10 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s44, v11 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s100, 4 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s45, v12 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s42, v13 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s43, v14 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s101, 5 ; GFX11-TRUE16-NEXT: s_mov_b32 vcc_hi, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s4, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s102, 6 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s103, 7 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s104, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s55, 15 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s64, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s65, 17 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s66, 18 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s67, 19 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s68, 20 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s69, 21 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s70, 22 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s71, 23 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s80, 24 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s81, 25 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s82, 26 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s83, 27 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s84, 28 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s85, 29 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s86, 30 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s87, 31 ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s27, 24 @@ -173706,6 +175679,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v8, v9 +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v41, 7 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v17, v2 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v19 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v18, v1 @@ -173717,47 +175691,46 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[4:7], off offset:80 ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[11:14], off offset:96 ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[15:18], off offset:112 -; GFX11-TRUE16-NEXT: v_readlane_b32 s104, v41, 8 -; GFX11-TRUE16-NEXT: v_readlane_b32 s103, v41, 7 -; GFX11-TRUE16-NEXT: v_readlane_b32 s102, v41, 6 -; GFX11-TRUE16-NEXT: v_readlane_b32 s101, v41, 5 -; GFX11-TRUE16-NEXT: v_readlane_b32 s100, v41, 4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s99, v41, 3 -; GFX11-TRUE16-NEXT: v_readlane_b32 s98, v41, 2 -; GFX11-TRUE16-NEXT: v_readlane_b32 s97, v41, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s96, v41, 0 -; GFX11-TRUE16-NEXT: v_readlane_b32 s87, v40, 31 -; GFX11-TRUE16-NEXT: v_readlane_b32 s86, v40, 30 -; GFX11-TRUE16-NEXT: v_readlane_b32 s85, v40, 29 -; GFX11-TRUE16-NEXT: v_readlane_b32 s84, v40, 28 -; GFX11-TRUE16-NEXT: v_readlane_b32 s83, v40, 27 -; GFX11-TRUE16-NEXT: v_readlane_b32 s82, v40, 26 -; GFX11-TRUE16-NEXT: v_readlane_b32 s81, v40, 25 -; GFX11-TRUE16-NEXT: v_readlane_b32 s80, v40, 24 -; GFX11-TRUE16-NEXT: v_readlane_b32 s71, v40, 23 -; GFX11-TRUE16-NEXT: v_readlane_b32 s70, v40, 22 -; GFX11-TRUE16-NEXT: v_readlane_b32 s69, v40, 21 -; GFX11-TRUE16-NEXT: v_readlane_b32 s68, v40, 20 -; GFX11-TRUE16-NEXT: v_readlane_b32 s67, v40, 19 -; GFX11-TRUE16-NEXT: v_readlane_b32 s66, v40, 18 -; GFX11-TRUE16-NEXT: v_readlane_b32 s65, v40, 17 -; GFX11-TRUE16-NEXT: v_readlane_b32 s64, v40, 16 -; GFX11-TRUE16-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-TRUE16-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-TRUE16-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-TRUE16-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-TRUE16-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-TRUE16-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-TRUE16-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-TRUE16-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-TRUE16-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-TRUE16-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-TRUE16-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-TRUE16-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v41, 8 +; GFX11-TRUE16-NEXT: v_readlane_b32 s104, v41, 6 +; GFX11-TRUE16-NEXT: v_readlane_b32 s103, v41, 5 +; GFX11-TRUE16-NEXT: v_readlane_b32 s102, v41, 4 +; GFX11-TRUE16-NEXT: v_readlane_b32 s101, v41, 3 +; GFX11-TRUE16-NEXT: v_readlane_b32 s100, v41, 2 +; GFX11-TRUE16-NEXT: v_readlane_b32 s99, v41, 1 +; GFX11-TRUE16-NEXT: v_readlane_b32 s98, v41, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s97, v40, 31 +; GFX11-TRUE16-NEXT: v_readlane_b32 s96, v40, 30 +; GFX11-TRUE16-NEXT: v_readlane_b32 s87, v40, 29 +; GFX11-TRUE16-NEXT: v_readlane_b32 s86, v40, 28 +; GFX11-TRUE16-NEXT: v_readlane_b32 s85, v40, 27 +; GFX11-TRUE16-NEXT: v_readlane_b32 s84, v40, 26 +; GFX11-TRUE16-NEXT: v_readlane_b32 s83, v40, 25 +; GFX11-TRUE16-NEXT: v_readlane_b32 s82, v40, 24 +; GFX11-TRUE16-NEXT: v_readlane_b32 s81, v40, 23 +; GFX11-TRUE16-NEXT: v_readlane_b32 s80, v40, 22 +; GFX11-TRUE16-NEXT: v_readlane_b32 s71, v40, 21 +; GFX11-TRUE16-NEXT: v_readlane_b32 s70, v40, 20 +; GFX11-TRUE16-NEXT: v_readlane_b32 s69, v40, 19 +; GFX11-TRUE16-NEXT: v_readlane_b32 s68, v40, 18 +; GFX11-TRUE16-NEXT: v_readlane_b32 s67, v40, 17 +; GFX11-TRUE16-NEXT: v_readlane_b32 s66, v40, 16 +; GFX11-TRUE16-NEXT: v_readlane_b32 s65, v40, 15 +; GFX11-TRUE16-NEXT: v_readlane_b32 s64, v40, 14 +; GFX11-TRUE16-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-TRUE16-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-TRUE16-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-TRUE16-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-TRUE16-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-TRUE16-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-TRUE16-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-TRUE16-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-TRUE16-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-TRUE16-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-TRUE16-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-TRUE16-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-TRUE16-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-TRUE16-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 @@ -173781,66 +175754,66 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s96, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s64, 14 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s65, 15 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s66, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s67, 17 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s68, 18 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s69, 19 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s70, 20 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s71, 21 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s80, 22 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s81, 23 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s82, 24 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s83, 25 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s84, 26 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s85, 27 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s86, 28 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s87, 29 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s96, 30 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s97, 31 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s98, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s99, 1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s100, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s101, 3 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s102, 4 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s103, 5 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s104, 6 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s30, 7 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s31, 8 ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s72, v1 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s73, v2 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s97, 1 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s62, v3 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s63, v4 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s60, v5 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s98, 2 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s61, v6 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s58, v7 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s59, v8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s99, 3 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s56, v9 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s57, v10 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s46, v11 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s100, 4 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s47, v12 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s44, v13 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s45, v14 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s101, 5 ; GFX11-FAKE16-NEXT: s_mov_b32 vcc_hi, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s4, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s102, 6 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s103, 7 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s104, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s55, 15 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s64, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s65, 17 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s66, 18 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s67, 19 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s68, 20 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s69, 21 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s70, 22 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s71, 23 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s80, 24 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s81, 25 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s82, 26 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s83, 27 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s84, 28 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s85, 29 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s86, 30 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s87, 31 ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false ; GFX11-FAKE16-NEXT: s_lshr_b32 s4, s27, 24 @@ -175279,6 +177252,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v3, v4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v41, 7 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v10, v2 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v11, v18 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v17, v19 @@ -175290,47 +177264,46 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:80 ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[13:16], off offset:96 ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 -; GFX11-FAKE16-NEXT: v_readlane_b32 s104, v41, 8 -; GFX11-FAKE16-NEXT: v_readlane_b32 s103, v41, 7 -; GFX11-FAKE16-NEXT: v_readlane_b32 s102, v41, 6 -; GFX11-FAKE16-NEXT: v_readlane_b32 s101, v41, 5 -; GFX11-FAKE16-NEXT: v_readlane_b32 s100, v41, 4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s99, v41, 3 -; GFX11-FAKE16-NEXT: v_readlane_b32 s98, v41, 2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s97, v41, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s96, v41, 0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s87, v40, 31 -; GFX11-FAKE16-NEXT: v_readlane_b32 s86, v40, 30 -; GFX11-FAKE16-NEXT: v_readlane_b32 s85, v40, 29 -; GFX11-FAKE16-NEXT: v_readlane_b32 s84, v40, 28 -; GFX11-FAKE16-NEXT: v_readlane_b32 s83, v40, 27 -; GFX11-FAKE16-NEXT: v_readlane_b32 s82, v40, 26 -; GFX11-FAKE16-NEXT: v_readlane_b32 s81, v40, 25 -; GFX11-FAKE16-NEXT: v_readlane_b32 s80, v40, 24 -; GFX11-FAKE16-NEXT: v_readlane_b32 s71, v40, 23 -; GFX11-FAKE16-NEXT: v_readlane_b32 s70, v40, 22 -; GFX11-FAKE16-NEXT: v_readlane_b32 s69, v40, 21 -; GFX11-FAKE16-NEXT: v_readlane_b32 s68, v40, 20 -; GFX11-FAKE16-NEXT: v_readlane_b32 s67, v40, 19 -; GFX11-FAKE16-NEXT: v_readlane_b32 s66, v40, 18 -; GFX11-FAKE16-NEXT: v_readlane_b32 s65, v40, 17 -; GFX11-FAKE16-NEXT: v_readlane_b32 s64, v40, 16 -; GFX11-FAKE16-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-FAKE16-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-FAKE16-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-FAKE16-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-FAKE16-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-FAKE16-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-FAKE16-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-FAKE16-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-FAKE16-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-FAKE16-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-FAKE16-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-FAKE16-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v41, 8 +; GFX11-FAKE16-NEXT: v_readlane_b32 s104, v41, 6 +; GFX11-FAKE16-NEXT: v_readlane_b32 s103, v41, 5 +; GFX11-FAKE16-NEXT: v_readlane_b32 s102, v41, 4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s101, v41, 3 +; GFX11-FAKE16-NEXT: v_readlane_b32 s100, v41, 2 +; GFX11-FAKE16-NEXT: v_readlane_b32 s99, v41, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s98, v41, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s97, v40, 31 +; GFX11-FAKE16-NEXT: v_readlane_b32 s96, v40, 30 +; GFX11-FAKE16-NEXT: v_readlane_b32 s87, v40, 29 +; GFX11-FAKE16-NEXT: v_readlane_b32 s86, v40, 28 +; GFX11-FAKE16-NEXT: v_readlane_b32 s85, v40, 27 +; GFX11-FAKE16-NEXT: v_readlane_b32 s84, v40, 26 +; GFX11-FAKE16-NEXT: v_readlane_b32 s83, v40, 25 +; GFX11-FAKE16-NEXT: v_readlane_b32 s82, v40, 24 +; GFX11-FAKE16-NEXT: v_readlane_b32 s81, v40, 23 +; GFX11-FAKE16-NEXT: v_readlane_b32 s80, v40, 22 +; GFX11-FAKE16-NEXT: v_readlane_b32 s71, v40, 21 +; GFX11-FAKE16-NEXT: v_readlane_b32 s70, v40, 20 +; GFX11-FAKE16-NEXT: v_readlane_b32 s69, v40, 19 +; GFX11-FAKE16-NEXT: v_readlane_b32 s68, v40, 18 +; GFX11-FAKE16-NEXT: v_readlane_b32 s67, v40, 17 +; GFX11-FAKE16-NEXT: v_readlane_b32 s66, v40, 16 +; GFX11-FAKE16-NEXT: v_readlane_b32 s65, v40, 15 +; GFX11-FAKE16-NEXT: v_readlane_b32 s64, v40, 14 +; GFX11-FAKE16-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-FAKE16-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-FAKE16-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-FAKE16-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-FAKE16-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-FAKE16-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-FAKE16-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-FAKE16-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-FAKE16-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-FAKE16-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-FAKE16-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-FAKE16-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 @@ -180815,53 +182788,99 @@ define <64 x half> @bitcast_v128i8_to_v64f16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -181753,60 +183772,71 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: s_mov_b32 s10, s16 -; SI-NEXT: s_waitcnt expcnt(1) ; SI-NEXT: v_writelane_b32 v61, s29, 0 ; SI-NEXT: v_writelane_b32 v61, s28, 1 ; SI-NEXT: v_writelane_b32 v61, s27, 2 ; SI-NEXT: s_mov_b32 s61, s21 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 ; SI-NEXT: s_mov_b32 s67, s19 ; SI-NEXT: s_mov_b32 s54, s17 ; SI-NEXT: s_mov_b32 s35, s23 ; SI-NEXT: s_mov_b32 s39, s26 ; SI-NEXT: s_mov_b32 s62, s25 -; SI-NEXT: v_writelane_b32 v63, s98, 34 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s99, v1 ; SI-NEXT: v_readfirstlane_b32 s74, v24 ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s6, v23 -; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_writelane_b32 v62, s74, 0 ; SI-NEXT: v_readfirstlane_b32 s12, v26 ; SI-NEXT: v_writelane_b32 v62, s6, 1 @@ -181837,10 +183867,6 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s42, v20 ; SI-NEXT: v_readfirstlane_b32 s43, v19 ; SI-NEXT: v_readfirstlane_b32 s44, v22 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_readfirstlane_b32 s4, v31 -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:328 -; SI-NEXT: v_writelane_b32 v61, s4, 3 ; SI-NEXT: v_readfirstlane_b32 s45, v21 ; SI-NEXT: v_readfirstlane_b32 s98, v10 ; SI-NEXT: v_readfirstlane_b32 s90, v8 @@ -181848,28 +183874,19 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s91, v6 ; SI-NEXT: v_readfirstlane_b32 s93, v4 ; SI-NEXT: v_readfirstlane_b32 s55, v2 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:336 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_readfirstlane_b32 s4, v31 -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:324 -; SI-NEXT: v_writelane_b32 v61, s4, 4 +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:328 +; SI-NEXT: v_writelane_b32 v61, s4, 3 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_readfirstlane_b32 s4, v31 +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:324 +; SI-NEXT: v_writelane_b32 v61, s4, 4 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:320 ; SI-NEXT: v_writelane_b32 v61, s4, 5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -182998,7 +185015,7 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 +; SI-NEXT: v_readlane_b32 s30, v63, 34 ; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; SI-NEXT: v_or_b32_e32 v5, v6, v5 ; SI-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen @@ -183006,41 +185023,41 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -185500,35 +187517,65 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -186272,35 +188319,65 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -187113,7 +189190,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-LABEL: bitcast_v64f16_to_v128i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill @@ -187130,6 +189206,7 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:136 ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 @@ -187158,16 +189235,16 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:92 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v33, v4 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v3 -; SI-NEXT: v_cvt_f16_f32_e32 v33, v4 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill ; SI-NEXT: v_cvt_f16_f32_e32 v31, v12 +; SI-NEXT: v_cvt_f16_f32_e32 v12, v16 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v6 -; SI-NEXT: v_cvt_f16_f32_e32 v12, v16 ; SI-NEXT: v_cvt_f16_f32_e32 v32, v8 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187203,26 +189280,27 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v18 ; SI-NEXT: ; implicit-def: $vgpr18 +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v17 -; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v10, v35 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: v_cvt_f16_f32_e32 v8, v39 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v19 -; SI-NEXT: v_cvt_f16_f32_e32 v8, v39 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr19 ; SI-NEXT: v_cvt_f16_f32_e32 v7, v51 +; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v22 -; SI-NEXT: v_cvt_f16_f32_e32 v56, v53 ; SI-NEXT: v_cvt_f16_f32_e32 v5, v55 +; SI-NEXT: v_cvt_f16_f32_e32 v56, v53 ; SI-NEXT: v_cvt_f16_f32_e32 v47, v54 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187235,6 +189313,7 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: v_cvt_f16_f32_e32 v1, v23 ; SI-NEXT: v_cvt_f16_f32_e32 v62, v60 ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 +; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v60, v45 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187245,7 +189324,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 ; SI-NEXT: ; implicit-def: $vgpr53 -; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr22 ; SI-NEXT: ; implicit-def: $vgpr23 @@ -187303,7 +189381,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v63 -; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v63, v46 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -191653,24 +193730,43 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -192373,6 +194469,42 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 @@ -192398,92 +194530,68 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:64 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:72 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 +; SI-NEXT: v_mov_b32_e32 v46, v29 +; SI-NEXT: v_cvt_f16_f32_e32 v47, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v4 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v3 ; SI-NEXT: v_cvt_f16_f32_e32 v3, v7 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v9 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v26 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v25 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v30 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_mov_b32_e32 v46, v29 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_cvt_f16_f32_e32 v47, v2 -; SI-NEXT: v_cvt_f16_f32_e32 v2, v4 ; SI-NEXT: v_cvt_f16_f32_e32 v33, v6 ; SI-NEXT: v_cvt_f16_f32_e32 v43, v5 ; SI-NEXT: v_cvt_f16_f32_e32 v5, v8 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v9 ; SI-NEXT: v_cvt_f16_f32_e32 v32, v10 ; SI-NEXT: v_cvt_f16_f32_e32 v7, v12 ; SI-NEXT: v_cvt_f16_f32_e32 v29, v11 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v26 ; SI-NEXT: v_cvt_f16_f32_e32 v31, v14 ; SI-NEXT: v_cvt_f16_f32_e32 v6, v13 ; SI-NEXT: v_cvt_f16_f32_e32 v58, v16 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v25 ; SI-NEXT: v_cvt_f16_f32_e32 v13, v15 -; SI-NEXT: s_waitcnt vmcnt(14) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: v_cvt_f16_f32_e32 v10, v18 ; SI-NEXT: v_cvt_f16_f32_e32 v11, v17 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v37 -; SI-NEXT: v_cvt_f16_f32_e32 v34, v20 +; SI-NEXT: v_cvt_f16_f32_e32 v3, v30 ; SI-NEXT: v_cvt_f16_f32_e32 v16, v19 ; SI-NEXT: v_cvt_f16_f32_e32 v9, v22 ; SI-NEXT: v_cvt_f16_f32_e32 v61, v21 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; SI-NEXT: v_cvt_f16_f32_e32 v21, v24 ; SI-NEXT: v_cvt_f16_f32_e32 v24, v23 ; SI-NEXT: v_cvt_f16_f32_e32 v44, v28 ; SI-NEXT: v_cvt_f16_f32_e32 v42, v27 ; SI-NEXT: v_cvt_f16_f32_e32 v46, v46 +; SI-NEXT: v_cvt_f16_f32_e32 v19, s17 +; SI-NEXT: v_cvt_f16_f32_e32 v18, s16 +; SI-NEXT: v_cvt_f16_f32_e32 v12, s19 +; SI-NEXT: v_cvt_f16_f32_e32 v17, s18 +; SI-NEXT: v_cvt_f16_f32_e32 v22, s22 +; SI-NEXT: v_cvt_f16_f32_e32 v15, s25 +; SI-NEXT: v_cvt_f16_f32_e32 v14, s24 +; SI-NEXT: v_cvt_f16_f32_e32 v30, s27 +; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 +; SI-NEXT: v_cvt_f16_f32_e32 v34, v20 ; SI-NEXT: v_cvt_f16_f32_e32 v8, v35 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v37 ; SI-NEXT: v_cvt_f16_f32_e32 v23, v36 +; SI-NEXT: v_cvt_f16_f32_e32 v27, v39 +; SI-NEXT: v_cvt_f16_f32_e32 v28, v48 ; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v3, v38 -; SI-NEXT: v_cvt_f16_f32_e32 v27, v39 -; SI-NEXT: v_cvt_f16_f32_e32 v28, v48 ; SI-NEXT: v_cvt_f16_f32_e32 v4, v49 ; SI-NEXT: v_cvt_f16_f32_e32 v45, v45 ; SI-NEXT: v_cvt_f16_f32_e32 v36, v56 @@ -192506,25 +194614,13 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v53, v40 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_cvt_f16_f32_e32 v55, v41 -; SI-NEXT: v_cvt_f16_f32_e32 v19, s17 -; SI-NEXT: v_cvt_f16_f32_e32 v18, s16 -; SI-NEXT: v_cvt_f16_f32_e32 v12, s19 -; SI-NEXT: v_cvt_f16_f32_e32 v17, s18 ; SI-NEXT: v_cvt_f16_f32_e32 v38, s21 ; SI-NEXT: v_cvt_f16_f32_e32 v37, s20 ; SI-NEXT: v_cvt_f16_f32_e32 v48, s23 -; SI-NEXT: v_cvt_f16_f32_e32 v22, s22 -; SI-NEXT: v_cvt_f16_f32_e32 v15, s25 -; SI-NEXT: v_cvt_f16_f32_e32 v14, s24 -; SI-NEXT: v_cvt_f16_f32_e32 v30, s27 ; SI-NEXT: v_cvt_f16_f32_e32 v39, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v35, s29 ; SI-NEXT: v_cvt_f16_f32_e32 v20, s28 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -194012,6 +196108,7 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; SI-NEXT: v_readlane_b32 s30, v63, 34 ; SI-NEXT: v_readlane_b32 s45, v62, 17 ; SI-NEXT: v_readlane_b32 s43, v62, 23 ; SI-NEXT: v_readlane_b32 s41, v62, 29 @@ -194019,42 +196116,41 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: v_readlane_b32 s27, v62, 41 ; SI-NEXT: v_readlane_b32 s25, v62, 45 ; SI-NEXT: v_readlane_b32 s9, v62, 49 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload @@ -194069,39 +196165,53 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 ; VI-NEXT: v_readfirstlane_b32 s42, v5 @@ -194121,20 +196231,6 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB95_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -195016,38 +197112,38 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: v_lshlrev_b32_e32 v18, 8, v18 ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; VI-NEXT: v_or_b32_sdwa v1, v61, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_sdwa v58, v23, v58 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload @@ -195428,43 +197524,57 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v3 ; GFX9-NEXT: v_readfirstlane_b32 s45, v4 ; GFX9-NEXT: v_readfirstlane_b32 s42, v5 @@ -195484,20 +197594,6 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB95_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -196347,42 +198443,42 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: v_or_b32_sdwa v25, v25, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v30, 8, v44 ; GFX9-NEXT: v_or_b32_sdwa v26, v26, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v15 ; GFX9-NEXT: v_or_b32_sdwa v15, v38, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -196742,86 +198838,104 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s13, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s7, v12 -; GFX11-NEXT: v_readfirstlane_b32 s4, v13 -; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 s99, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s12, v5 +; GFX11-NEXT: v_readfirstlane_b32 s13, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s6, v11 +; GFX11-NEXT: v_readfirstlane_b32 s7, v12 +; GFX11-NEXT: v_readfirstlane_b32 s4, v13 +; GFX11-NEXT: v_readfirstlane_b32 s5, v14 +; GFX11-NEXT: s_mov_b32 s99, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB95_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 16 @@ -197710,47 +199824,47 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -203170,53 +205284,99 @@ define <64 x i16> @bitcast_v128i8_to_v64i16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -204109,6 +206269,43 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v41, s34, 0 +; SI-NEXT: v_writelane_b32 v41, s35, 1 +; SI-NEXT: v_writelane_b32 v41, s36, 2 +; SI-NEXT: v_writelane_b32 v41, s37, 3 +; SI-NEXT: v_writelane_b32 v41, s38, 4 +; SI-NEXT: v_writelane_b32 v41, s39, 5 +; SI-NEXT: v_writelane_b32 v41, s48, 6 +; SI-NEXT: v_writelane_b32 v41, s49, 7 +; SI-NEXT: v_writelane_b32 v41, s50, 8 +; SI-NEXT: v_writelane_b32 v41, s51, 9 +; SI-NEXT: v_writelane_b32 v41, s52, 10 +; SI-NEXT: v_writelane_b32 v41, s53, 11 +; SI-NEXT: v_writelane_b32 v41, s54, 12 +; SI-NEXT: v_writelane_b32 v41, s55, 13 +; SI-NEXT: v_writelane_b32 v41, s64, 14 +; SI-NEXT: v_writelane_b32 v41, s65, 15 +; SI-NEXT: v_writelane_b32 v41, s66, 16 +; SI-NEXT: v_writelane_b32 v41, s67, 17 +; SI-NEXT: v_writelane_b32 v41, s68, 18 +; SI-NEXT: v_writelane_b32 v41, s69, 19 +; SI-NEXT: v_writelane_b32 v41, s70, 20 +; SI-NEXT: v_writelane_b32 v41, s71, 21 +; SI-NEXT: v_writelane_b32 v41, s80, 22 +; SI-NEXT: v_writelane_b32 v41, s81, 23 +; SI-NEXT: v_writelane_b32 v41, s82, 24 +; SI-NEXT: v_writelane_b32 v41, s83, 25 +; SI-NEXT: v_writelane_b32 v41, s84, 26 +; SI-NEXT: v_writelane_b32 v41, s85, 27 +; SI-NEXT: v_writelane_b32 v41, s86, 28 +; SI-NEXT: v_writelane_b32 v41, s87, 29 +; SI-NEXT: v_writelane_b32 v41, s96, 30 +; SI-NEXT: v_writelane_b32 v41, s97, 31 +; SI-NEXT: v_writelane_b32 v41, s98, 32 +; SI-NEXT: v_writelane_b32 v41, s99, 33 +; SI-NEXT: v_writelane_b32 v41, s30, 34 +; SI-NEXT: v_writelane_b32 v41, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:328 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 @@ -204118,8 +206315,7 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:308 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:304 ; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_writelane_b32 v41, s30, 0 +; SI-NEXT: v_readfirstlane_b32 s39, v26 ; SI-NEXT: s_waitcnt expcnt(1) ; SI-NEXT: v_writelane_b32 v43, s29, 0 ; SI-NEXT: v_writelane_b32 v43, s28, 1 @@ -204135,41 +206331,6 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_writelane_b32 v43, s18, 11 ; SI-NEXT: v_writelane_b32 v43, s17, 12 ; SI-NEXT: v_writelane_b32 v43, s16, 13 -; SI-NEXT: v_writelane_b32 v41, s31, 1 -; SI-NEXT: v_writelane_b32 v41, s34, 2 -; SI-NEXT: v_writelane_b32 v41, s35, 3 -; SI-NEXT: v_writelane_b32 v41, s36, 4 -; SI-NEXT: v_writelane_b32 v41, s37, 5 -; SI-NEXT: v_writelane_b32 v41, s38, 6 -; SI-NEXT: v_writelane_b32 v41, s39, 7 -; SI-NEXT: v_writelane_b32 v41, s48, 8 -; SI-NEXT: v_writelane_b32 v41, s49, 9 -; SI-NEXT: v_writelane_b32 v41, s50, 10 -; SI-NEXT: v_writelane_b32 v41, s51, 11 -; SI-NEXT: v_writelane_b32 v41, s52, 12 -; SI-NEXT: v_writelane_b32 v41, s53, 13 -; SI-NEXT: v_writelane_b32 v41, s54, 14 -; SI-NEXT: v_writelane_b32 v41, s55, 15 -; SI-NEXT: v_writelane_b32 v41, s64, 16 -; SI-NEXT: v_writelane_b32 v41, s65, 17 -; SI-NEXT: v_writelane_b32 v41, s66, 18 -; SI-NEXT: v_writelane_b32 v41, s67, 19 -; SI-NEXT: v_writelane_b32 v41, s68, 20 -; SI-NEXT: v_writelane_b32 v41, s69, 21 -; SI-NEXT: v_writelane_b32 v41, s70, 22 -; SI-NEXT: v_writelane_b32 v41, s71, 23 -; SI-NEXT: v_writelane_b32 v41, s80, 24 -; SI-NEXT: v_writelane_b32 v41, s81, 25 -; SI-NEXT: v_writelane_b32 v41, s82, 26 -; SI-NEXT: v_writelane_b32 v41, s83, 27 -; SI-NEXT: v_writelane_b32 v41, s84, 28 -; SI-NEXT: v_writelane_b32 v41, s85, 29 -; SI-NEXT: v_writelane_b32 v41, s86, 30 -; SI-NEXT: v_writelane_b32 v41, s87, 31 -; SI-NEXT: v_writelane_b32 v41, s96, 32 -; SI-NEXT: v_writelane_b32 v41, s97, 33 -; SI-NEXT: v_writelane_b32 v41, s98, 34 -; SI-NEXT: v_readfirstlane_b32 s39, v26 ; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s47, v12 ; SI-NEXT: v_writelane_b32 v42, s39, 0 @@ -204193,6 +206354,18 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s11, v1 ; SI-NEXT: v_readfirstlane_b32 s12, v2 ; SI-NEXT: v_readfirstlane_b32 s13, v9 +; SI-NEXT: v_readfirstlane_b32 s14, v10 +; SI-NEXT: v_readfirstlane_b32 s15, v8 +; SI-NEXT: v_readfirstlane_b32 s18, v7 +; SI-NEXT: v_readfirstlane_b32 s21, v5 +; SI-NEXT: v_readfirstlane_b32 s22, v6 +; SI-NEXT: v_readfirstlane_b32 s40, v17 +; SI-NEXT: v_readfirstlane_b32 s41, v18 +; SI-NEXT: v_readfirstlane_b32 s42, v4 +; SI-NEXT: v_readfirstlane_b32 s43, v3 +; SI-NEXT: v_readfirstlane_b32 s76, v16 +; SI-NEXT: v_readfirstlane_b32 s77, v15 +; SI-NEXT: v_readfirstlane_b32 s38, v25 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 14 @@ -204226,19 +206399,6 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:256 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s6, v38 -; SI-NEXT: v_readfirstlane_b32 s14, v10 -; SI-NEXT: v_readfirstlane_b32 s15, v8 -; SI-NEXT: v_readfirstlane_b32 s18, v7 -; SI-NEXT: v_readfirstlane_b32 s21, v5 -; SI-NEXT: v_readfirstlane_b32 s22, v6 -; SI-NEXT: v_readfirstlane_b32 s40, v17 -; SI-NEXT: v_readfirstlane_b32 s41, v18 -; SI-NEXT: v_readfirstlane_b32 s42, v4 -; SI-NEXT: v_readfirstlane_b32 s43, v3 -; SI-NEXT: v_readfirstlane_b32 s76, v16 -; SI-NEXT: v_readfirstlane_b32 s77, v15 -; SI-NEXT: v_readfirstlane_b32 s38, v25 -; SI-NEXT: v_writelane_b32 v41, s99, 35 ; SI-NEXT: s_waitcnt vmcnt(11) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 19 @@ -205742,42 +207902,42 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_mov_b32_e32 v1, s4 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v41, 35 -; SI-NEXT: v_readlane_b32 s98, v41, 34 -; SI-NEXT: v_readlane_b32 s97, v41, 33 -; SI-NEXT: v_readlane_b32 s96, v41, 32 -; SI-NEXT: v_readlane_b32 s87, v41, 31 -; SI-NEXT: v_readlane_b32 s86, v41, 30 -; SI-NEXT: v_readlane_b32 s85, v41, 29 -; SI-NEXT: v_readlane_b32 s84, v41, 28 -; SI-NEXT: v_readlane_b32 s83, v41, 27 -; SI-NEXT: v_readlane_b32 s82, v41, 26 -; SI-NEXT: v_readlane_b32 s81, v41, 25 -; SI-NEXT: v_readlane_b32 s80, v41, 24 -; SI-NEXT: v_readlane_b32 s71, v41, 23 -; SI-NEXT: v_readlane_b32 s70, v41, 22 -; SI-NEXT: v_readlane_b32 s69, v41, 21 -; SI-NEXT: v_readlane_b32 s68, v41, 20 -; SI-NEXT: v_readlane_b32 s67, v41, 19 -; SI-NEXT: v_readlane_b32 s66, v41, 18 -; SI-NEXT: v_readlane_b32 s65, v41, 17 -; SI-NEXT: v_readlane_b32 s64, v41, 16 -; SI-NEXT: v_readlane_b32 s55, v41, 15 -; SI-NEXT: v_readlane_b32 s54, v41, 14 -; SI-NEXT: v_readlane_b32 s53, v41, 13 -; SI-NEXT: v_readlane_b32 s52, v41, 12 -; SI-NEXT: v_readlane_b32 s51, v41, 11 -; SI-NEXT: v_readlane_b32 s50, v41, 10 -; SI-NEXT: v_readlane_b32 s49, v41, 9 -; SI-NEXT: v_readlane_b32 s48, v41, 8 -; SI-NEXT: v_readlane_b32 s39, v41, 7 -; SI-NEXT: v_readlane_b32 s38, v41, 6 -; SI-NEXT: v_readlane_b32 s37, v41, 5 -; SI-NEXT: v_readlane_b32 s36, v41, 4 -; SI-NEXT: v_readlane_b32 s35, v41, 3 -; SI-NEXT: v_readlane_b32 s34, v41, 2 -; SI-NEXT: v_readlane_b32 s31, v41, 1 -; SI-NEXT: v_readlane_b32 s30, v41, 0 +; SI-NEXT: v_readlane_b32 s30, v41, 34 +; SI-NEXT: v_readlane_b32 s31, v41, 35 +; SI-NEXT: v_readlane_b32 s99, v41, 33 +; SI-NEXT: v_readlane_b32 s98, v41, 32 +; SI-NEXT: v_readlane_b32 s97, v41, 31 +; SI-NEXT: v_readlane_b32 s96, v41, 30 +; SI-NEXT: v_readlane_b32 s87, v41, 29 +; SI-NEXT: v_readlane_b32 s86, v41, 28 +; SI-NEXT: v_readlane_b32 s85, v41, 27 +; SI-NEXT: v_readlane_b32 s84, v41, 26 +; SI-NEXT: v_readlane_b32 s83, v41, 25 +; SI-NEXT: v_readlane_b32 s82, v41, 24 +; SI-NEXT: v_readlane_b32 s81, v41, 23 +; SI-NEXT: v_readlane_b32 s80, v41, 22 +; SI-NEXT: v_readlane_b32 s71, v41, 21 +; SI-NEXT: v_readlane_b32 s70, v41, 20 +; SI-NEXT: v_readlane_b32 s69, v41, 19 +; SI-NEXT: v_readlane_b32 s68, v41, 18 +; SI-NEXT: v_readlane_b32 s67, v41, 17 +; SI-NEXT: v_readlane_b32 s66, v41, 16 +; SI-NEXT: v_readlane_b32 s65, v41, 15 +; SI-NEXT: v_readlane_b32 s64, v41, 14 +; SI-NEXT: v_readlane_b32 s55, v41, 13 +; SI-NEXT: v_readlane_b32 s54, v41, 12 +; SI-NEXT: v_readlane_b32 s53, v41, 11 +; SI-NEXT: v_readlane_b32 s52, v41, 10 +; SI-NEXT: v_readlane_b32 s51, v41, 9 +; SI-NEXT: v_readlane_b32 s50, v41, 8 +; SI-NEXT: v_readlane_b32 s49, v41, 7 +; SI-NEXT: v_readlane_b32 s48, v41, 6 +; SI-NEXT: v_readlane_b32 s39, v41, 5 +; SI-NEXT: v_readlane_b32 s38, v41, 4 +; SI-NEXT: v_readlane_b32 s37, v41, 3 +; SI-NEXT: v_readlane_b32 s36, v41, 2 +; SI-NEXT: v_readlane_b32 s35, v41, 1 +; SI-NEXT: v_readlane_b32 s34, v41, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload @@ -207926,35 +210086,65 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -208698,35 +210888,65 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -214238,24 +216458,43 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -214946,6 +217185,43 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s96, 30 +; SI-NEXT: v_writelane_b32 v40, s97, 31 +; SI-NEXT: v_writelane_b32 v40, s98, 32 +; SI-NEXT: v_writelane_b32 v40, s99, 33 +; SI-NEXT: v_writelane_b32 v40, s30, 34 +; SI-NEXT: v_writelane_b32 v40, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68 @@ -214954,36 +217230,7 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:52 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:48 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 ; SI-NEXT: s_mov_b32 s88, s17 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 ; SI-NEXT: v_readfirstlane_b32 s6, v16 ; SI-NEXT: ; implicit-def: $vgpr41 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s7, v15 @@ -215009,14 +217256,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s16, v27 ; SI-NEXT: v_writelane_b32 v41, s14, 9 ; SI-NEXT: v_writelane_b32 v41, s16, 10 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 -; SI-NEXT: v_writelane_b32 v40, s86, 30 -; SI-NEXT: v_writelane_b32 v40, s87, 31 -; SI-NEXT: v_writelane_b32 v40, s96, 32 -; SI-NEXT: v_writelane_b32 v40, s97, 33 -; SI-NEXT: v_writelane_b32 v40, s98, 34 -; SI-NEXT: v_writelane_b32 v40, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s98, v30 ; SI-NEXT: v_readfirstlane_b32 s97, v26 ; SI-NEXT: v_readfirstlane_b32 s96, v22 @@ -215029,6 +217268,15 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s85, v10 ; SI-NEXT: v_readfirstlane_b32 s51, v9 ; SI-NEXT: v_readfirstlane_b32 s53, v8 +; SI-NEXT: v_readfirstlane_b32 s65, v7 +; SI-NEXT: v_readfirstlane_b32 s84, v6 +; SI-NEXT: v_readfirstlane_b32 s31, v5 +; SI-NEXT: v_readfirstlane_b32 s37, v4 +; SI-NEXT: v_readfirstlane_b32 s49, v3 +; SI-NEXT: v_readfirstlane_b32 s78, v2 +; SI-NEXT: v_readfirstlane_b32 s39, v1 +; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane +; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s89, v31 ; SI-NEXT: s_waitcnt vmcnt(6) @@ -215058,15 +217306,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s83, v38 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:80 -; SI-NEXT: v_readfirstlane_b32 s65, v7 -; SI-NEXT: v_readfirstlane_b32 s84, v6 -; SI-NEXT: v_readfirstlane_b32 s31, v5 -; SI-NEXT: v_readfirstlane_b32 s37, v4 -; SI-NEXT: v_readfirstlane_b32 s49, v3 -; SI-NEXT: v_readfirstlane_b32 s78, v2 -; SI-NEXT: v_readfirstlane_b32 s39, v1 -; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane -; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s77, v31 ; SI-NEXT: s_waitcnt vmcnt(11) @@ -216331,6 +218570,7 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v40, 34 ; SI-NEXT: v_readlane_b32 s21, v41, 33 ; SI-NEXT: v_readlane_b32 s19, v41, 51 ; SI-NEXT: v_readlane_b32 s17, v41, 57 @@ -216339,42 +218579,41 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v43, 11 ; SI-NEXT: v_readlane_b32 s9, v43, 17 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v40, 35 -; SI-NEXT: v_readlane_b32 s98, v40, 34 -; SI-NEXT: v_readlane_b32 s97, v40, 33 -; SI-NEXT: v_readlane_b32 s96, v40, 32 -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 35 +; SI-NEXT: v_readlane_b32 s99, v40, 33 +; SI-NEXT: v_readlane_b32 s98, v40, 32 +; SI-NEXT: v_readlane_b32 s97, v40, 31 +; SI-NEXT: v_readlane_b32 s96, v40, 30 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -216616,38 +218855,39 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s42, v3 ; VI-NEXT: v_readfirstlane_b32 s43, v4 ; VI-NEXT: v_readfirstlane_b32 s40, v5 @@ -216667,7 +218907,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s45, v2 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB99_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -217582,39 +219821,39 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -217785,43 +220024,57 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v3 ; GFX9-NEXT: v_readfirstlane_b32 s45, v4 ; GFX9-NEXT: v_readfirstlane_b32 s42, v5 @@ -217841,20 +220094,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB99_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -218703,42 +220942,42 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: v_or_b32_sdwa v25, v25, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v30, 8, v44 ; GFX9-NEXT: v_or_b32_sdwa v26, v26, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v15 ; GFX9-NEXT: v_or_b32_sdwa v15, v38, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -219098,86 +221337,104 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s13, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s7, v12 -; GFX11-NEXT: v_readfirstlane_b32 s4, v13 -; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 s99, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s12, v5 +; GFX11-NEXT: v_readfirstlane_b32 s13, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s6, v11 +; GFX11-NEXT: v_readfirstlane_b32 s7, v12 +; GFX11-NEXT: v_readfirstlane_b32 s4, v13 +; GFX11-NEXT: v_readfirstlane_b32 s5, v14 +; GFX11-NEXT: s_mov_b32 s99, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB99_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 16 @@ -220066,47 +222323,47 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -222667,20 +224924,35 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v48, v16 ; GFX11-TRUE16-NEXT: s_clause 0x1 @@ -225015,9 +227287,11 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_writelane_b32 v42, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_writelane_b32 v42, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_mov_b32_e32 v31, v17 ; VI-NEXT: v_mov_b32_e32 v30, v16 ; VI-NEXT: v_mov_b32_e32 v29, v15 @@ -225037,8 +227311,6 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB101_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB101_4 @@ -225643,9 +227915,9 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: .LBB101_5: ; %end ; VI-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: v_mov_b32_e32 v18, v32 ; VI-NEXT: v_readlane_b32 s31, v42, 1 -; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -225658,9 +227930,12 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_writelane_b32 v43, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_mov_b32_e32 v31, v17 ; GFX9-NEXT: v_mov_b32_e32 v30, v16 ; GFX9-NEXT: v_mov_b32_e32 v29, v15 @@ -225680,9 +227955,6 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB101_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB101_4 @@ -226321,9 +228593,9 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: v_mov_b32_e32 v18, v32 ; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -235234,9 +237506,11 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_writelane_b32 v42, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_writelane_b32 v42, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_mov_b32_e32 v31, v17 ; VI-NEXT: v_mov_b32_e32 v30, v16 ; VI-NEXT: v_mov_b32_e32 v29, v15 @@ -235256,8 +237530,6 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB105_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB105_4 @@ -235862,9 +238134,9 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: .LBB105_5: ; %end ; VI-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: v_mov_b32_e32 v18, v32 ; VI-NEXT: v_readlane_b32 s31, v42, 1 -; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -235877,9 +238149,12 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_writelane_b32 v43, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_mov_b32_e32 v31, v17 ; GFX9-NEXT: v_mov_b32_e32 v30, v16 ; GFX9-NEXT: v_mov_b32_e32 v29, v15 @@ -235899,9 +238174,6 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB105_4 @@ -236508,9 +238780,9 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: v_mov_b32_e32 v18, v32 ; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -238864,6 +241136,43 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(1) +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s96, 30 +; SI-NEXT: v_writelane_b32 v40, s97, 31 +; SI-NEXT: v_writelane_b32 v40, s98, 32 +; SI-NEXT: v_writelane_b32 v40, s99, 33 +; SI-NEXT: v_writelane_b32 v40, s30, 34 +; SI-NEXT: v_writelane_b32 v40, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68 @@ -238872,39 +241181,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:52 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:48 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 -; SI-NEXT: v_writelane_b32 v40, s86, 30 -; SI-NEXT: v_writelane_b32 v40, s87, 31 ; SI-NEXT: ; implicit-def: $vgpr41 : SGPR spill to VGPR lane ; SI-NEXT: s_mov_b32 s60, s16 ; SI-NEXT: s_waitcnt expcnt(0) @@ -238944,9 +241220,32 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: v_writelane_b32 v41, s34, 19 ; SI-NEXT: v_readfirstlane_b32 s36, v10 ; SI-NEXT: v_writelane_b32 v41, s35, 20 -; SI-NEXT: v_writelane_b32 v40, s96, 32 ; SI-NEXT: v_readfirstlane_b32 s37, v9 ; SI-NEXT: v_writelane_b32 v41, s36, 21 +; SI-NEXT: v_readfirstlane_b32 s38, v12 +; SI-NEXT: v_writelane_b32 v41, s37, 22 +; SI-NEXT: v_readfirstlane_b32 s14, v30 +; SI-NEXT: v_readfirstlane_b32 s15, v29 +; SI-NEXT: v_readfirstlane_b32 s12, v28 +; SI-NEXT: v_readfirstlane_b32 s13, v27 +; SI-NEXT: v_readfirstlane_b32 s10, v26 +; SI-NEXT: v_readfirstlane_b32 s11, v25 +; SI-NEXT: v_readfirstlane_b32 s8, v24 +; SI-NEXT: v_readfirstlane_b32 s9, v23 +; SI-NEXT: v_readfirstlane_b32 s88, v22 +; SI-NEXT: v_readfirstlane_b32 s29, v21 +; SI-NEXT: v_readfirstlane_b32 s79, v20 +; SI-NEXT: v_readfirstlane_b32 s27, v19 +; SI-NEXT: v_readfirstlane_b32 s78, v18 +; SI-NEXT: v_readfirstlane_b32 s25, v17 +; SI-NEXT: v_readfirstlane_b32 s77, v16 +; SI-NEXT: v_readfirstlane_b32 s23, v15 +; SI-NEXT: v_readfirstlane_b32 s39, v14 +; SI-NEXT: v_readfirstlane_b32 s21, v13 +; SI-NEXT: v_readfirstlane_b32 s19, v11 +; SI-NEXT: v_readfirstlane_b32 s18, v1 +; SI-NEXT: v_writelane_b32 v41, s38, 23 +; SI-NEXT: v_writelane_b32 v41, s39, 24 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s62, v31 ; SI-NEXT: s_waitcnt vmcnt(6) @@ -238975,33 +241274,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 -; SI-NEXT: v_writelane_b32 v40, s97, 33 -; SI-NEXT: v_readfirstlane_b32 s38, v12 -; SI-NEXT: v_writelane_b32 v41, s37, 22 -; SI-NEXT: v_writelane_b32 v40, s98, 34 -; SI-NEXT: v_readfirstlane_b32 s14, v30 -; SI-NEXT: v_readfirstlane_b32 s15, v29 -; SI-NEXT: v_readfirstlane_b32 s12, v28 -; SI-NEXT: v_readfirstlane_b32 s13, v27 -; SI-NEXT: v_readfirstlane_b32 s10, v26 -; SI-NEXT: v_readfirstlane_b32 s11, v25 -; SI-NEXT: v_readfirstlane_b32 s8, v24 -; SI-NEXT: v_readfirstlane_b32 s9, v23 -; SI-NEXT: v_readfirstlane_b32 s88, v22 -; SI-NEXT: v_readfirstlane_b32 s29, v21 -; SI-NEXT: v_readfirstlane_b32 s79, v20 -; SI-NEXT: v_readfirstlane_b32 s27, v19 -; SI-NEXT: v_readfirstlane_b32 s78, v18 -; SI-NEXT: v_readfirstlane_b32 s25, v17 -; SI-NEXT: v_readfirstlane_b32 s77, v16 -; SI-NEXT: v_readfirstlane_b32 s23, v15 -; SI-NEXT: v_readfirstlane_b32 s39, v14 -; SI-NEXT: v_readfirstlane_b32 s21, v13 -; SI-NEXT: v_readfirstlane_b32 s19, v11 -; SI-NEXT: v_readfirstlane_b32 s18, v1 -; SI-NEXT: v_writelane_b32 v41, s38, 23 -; SI-NEXT: v_writelane_b32 v40, s99, 35 -; SI-NEXT: v_writelane_b32 v41, s39, 24 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s58, v31 ; SI-NEXT: s_waitcnt vmcnt(11) @@ -239696,43 +241968,43 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s5 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v40, 35 -; SI-NEXT: v_readlane_b32 s98, v40, 34 -; SI-NEXT: v_readlane_b32 s97, v40, 33 -; SI-NEXT: v_readlane_b32 s96, v40, 32 -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 35 +; SI-NEXT: v_readlane_b32 s99, v40, 33 +; SI-NEXT: v_readlane_b32 s98, v40, 32 +; SI-NEXT: v_readlane_b32 s97, v40, 31 +; SI-NEXT: v_readlane_b32 s96, v40, 30 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -239746,14 +242018,15 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v32, s30, 0 -; VI-NEXT: v_writelane_b32 v32, s31, 1 -; VI-NEXT: v_writelane_b32 v32, s34, 2 -; VI-NEXT: v_writelane_b32 v32, s35, 3 -; VI-NEXT: v_writelane_b32 v32, s36, 4 -; VI-NEXT: v_writelane_b32 v32, s37, 5 +; VI-NEXT: v_writelane_b32 v32, s34, 0 +; VI-NEXT: v_writelane_b32 v32, s35, 1 +; VI-NEXT: v_writelane_b32 v32, s36, 2 +; VI-NEXT: v_writelane_b32 v32, s37, 3 +; VI-NEXT: v_writelane_b32 v32, s38, 4 +; VI-NEXT: v_writelane_b32 v32, s39, 5 +; VI-NEXT: v_writelane_b32 v32, s30, 6 +; VI-NEXT: v_writelane_b32 v32, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; VI-NEXT: v_writelane_b32 v32, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s47, v2 ; VI-NEXT: v_readfirstlane_b32 s46, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 @@ -239773,7 +242046,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v1 -; VI-NEXT: v_writelane_b32 v32, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB107_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB107_3 @@ -239939,6 +242211,7 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: s_add_i32 s46, s46, 0x30000 ; VI-NEXT: s_add_i32 s47, s4, 0x30000 ; VI-NEXT: .LBB107_3: ; %end +; VI-NEXT: v_readlane_b32 s30, v32, 6 ; VI-NEXT: v_mov_b32_e32 v0, s16 ; VI-NEXT: v_mov_b32_e32 v1, s17 ; VI-NEXT: v_mov_b32_e32 v2, s18 @@ -239971,14 +242244,13 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: v_mov_b32_e32 v29, s10 ; VI-NEXT: v_mov_b32_e32 v30, s9 ; VI-NEXT: v_mov_b32_e32 v31, s8 -; VI-NEXT: v_readlane_b32 s39, v32, 7 -; VI-NEXT: v_readlane_b32 s38, v32, 6 -; VI-NEXT: v_readlane_b32 s37, v32, 5 -; VI-NEXT: v_readlane_b32 s36, v32, 4 -; VI-NEXT: v_readlane_b32 s35, v32, 3 -; VI-NEXT: v_readlane_b32 s34, v32, 2 -; VI-NEXT: v_readlane_b32 s31, v32, 1 -; VI-NEXT: v_readlane_b32 s30, v32, 0 +; VI-NEXT: v_readlane_b32 s31, v32, 7 +; VI-NEXT: v_readlane_b32 s39, v32, 5 +; VI-NEXT: v_readlane_b32 s38, v32, 4 +; VI-NEXT: v_readlane_b32 s37, v32, 3 +; VI-NEXT: v_readlane_b32 s36, v32, 2 +; VI-NEXT: v_readlane_b32 s35, v32, 1 +; VI-NEXT: v_readlane_b32 s34, v32, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -244381,14 +246653,15 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v32, s30, 0 -; VI-NEXT: v_writelane_b32 v32, s31, 1 -; VI-NEXT: v_writelane_b32 v32, s34, 2 -; VI-NEXT: v_writelane_b32 v32, s35, 3 -; VI-NEXT: v_writelane_b32 v32, s36, 4 -; VI-NEXT: v_writelane_b32 v32, s37, 5 +; VI-NEXT: v_writelane_b32 v32, s34, 0 +; VI-NEXT: v_writelane_b32 v32, s35, 1 +; VI-NEXT: v_writelane_b32 v32, s36, 2 +; VI-NEXT: v_writelane_b32 v32, s37, 3 +; VI-NEXT: v_writelane_b32 v32, s38, 4 +; VI-NEXT: v_writelane_b32 v32, s39, 5 +; VI-NEXT: v_writelane_b32 v32, s30, 6 +; VI-NEXT: v_writelane_b32 v32, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; VI-NEXT: v_writelane_b32 v32, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s47, v2 ; VI-NEXT: v_readfirstlane_b32 s46, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 @@ -244408,7 +246681,6 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v1 -; VI-NEXT: v_writelane_b32 v32, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB111_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB111_3 @@ -244574,6 +246846,7 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: s_add_i32 s46, s46, 0x30000 ; VI-NEXT: s_add_i32 s47, s4, 0x30000 ; VI-NEXT: .LBB111_3: ; %end +; VI-NEXT: v_readlane_b32 s30, v32, 6 ; VI-NEXT: v_mov_b32_e32 v0, s16 ; VI-NEXT: v_mov_b32_e32 v1, s17 ; VI-NEXT: v_mov_b32_e32 v2, s18 @@ -244606,14 +246879,13 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v29, s10 ; VI-NEXT: v_mov_b32_e32 v30, s9 ; VI-NEXT: v_mov_b32_e32 v31, s8 -; VI-NEXT: v_readlane_b32 s39, v32, 7 -; VI-NEXT: v_readlane_b32 s38, v32, 6 -; VI-NEXT: v_readlane_b32 s37, v32, 5 -; VI-NEXT: v_readlane_b32 s36, v32, 4 -; VI-NEXT: v_readlane_b32 s35, v32, 3 -; VI-NEXT: v_readlane_b32 s34, v32, 2 -; VI-NEXT: v_readlane_b32 s31, v32, 1 -; VI-NEXT: v_readlane_b32 s30, v32, 0 +; VI-NEXT: v_readlane_b32 s31, v32, 7 +; VI-NEXT: v_readlane_b32 s39, v32, 5 +; VI-NEXT: v_readlane_b32 s38, v32, 4 +; VI-NEXT: v_readlane_b32 s37, v32, 3 +; VI-NEXT: v_readlane_b32 s36, v32, 2 +; VI-NEXT: v_readlane_b32 s35, v32, 1 +; VI-NEXT: v_readlane_b32 s34, v32, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll index 01e397d629ea9..a48eb27460f7d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll @@ -30553,14 +30553,14 @@ define <32 x i8> @bitcast_v16i16_to_v32i8(<16 x i16> %a, i32 %b) { ; SI-LABEL: bitcast_v16i16_to_v32i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v48, v15 -; SI-NEXT: v_mov_b32_e32 v49, v11 -; SI-NEXT: v_mov_b32_e32 v50, v7 -; SI-NEXT: v_mov_b32_e32 v51, v3 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_mov_b32_e32 v48, v15 +; SI-NEXT: v_mov_b32_e32 v49, v11 +; SI-NEXT: v_mov_b32_e32 v50, v7 +; SI-NEXT: v_mov_b32_e32 v51, v3 ; SI-NEXT: v_mov_b32_e32 v32, v14 ; SI-NEXT: v_mov_b32_e32 v37, v12 ; SI-NEXT: v_mov_b32_e32 v33, v10 @@ -40102,11 +40102,11 @@ define inreg <32 x i8> @bitcast_v16bf16_to_v32i8_scalar(<16 x bfloat> inreg %a, ; SI-LABEL: bitcast_v16bf16_to_v32i8_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v24, 1.0, s17 ; SI-NEXT: v_mul_f32_e64 v32, 1.0, s16 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll index 9041f64cb17fb..7adaa6d3c3651 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll @@ -15733,6 +15733,10 @@ define <40 x i8> @bitcast_v20i16_to_v40i8(<20 x i16> %a, i32 %b) { ; VI-LABEL: bitcast_v20i16_to_v40i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v10 ; VI-NEXT: v_lshrrev_b32_e32 v20, 16, v9 @@ -15744,10 +15748,6 @@ define <40 x i8> @bitcast_v20i16_to_v40i8(<20 x i16> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v3 ; VI-NEXT: v_lshrrev_b32_e32 v23, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v26, 16, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr34 ; VI-NEXT: ; implicit-def: $vgpr40 ; VI-NEXT: ; implicit-def: $vgpr15 @@ -16525,18 +16525,18 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v8, s30, 0 -; SI-NEXT: v_writelane_b32 v8, s31, 1 -; SI-NEXT: v_writelane_b32 v8, s34, 2 -; SI-NEXT: v_writelane_b32 v8, s35, 3 -; SI-NEXT: v_writelane_b32 v8, s36, 4 -; SI-NEXT: v_writelane_b32 v8, s37, 5 -; SI-NEXT: v_writelane_b32 v8, s38, 6 -; SI-NEXT: v_writelane_b32 v8, s39, 7 -; SI-NEXT: v_writelane_b32 v8, s48, 8 -; SI-NEXT: v_writelane_b32 v8, s49, 9 +; SI-NEXT: v_writelane_b32 v8, s34, 0 +; SI-NEXT: v_writelane_b32 v8, s35, 1 +; SI-NEXT: v_writelane_b32 v8, s36, 2 +; SI-NEXT: v_writelane_b32 v8, s37, 3 +; SI-NEXT: v_writelane_b32 v8, s38, 4 +; SI-NEXT: v_writelane_b32 v8, s39, 5 +; SI-NEXT: v_writelane_b32 v8, s48, 6 +; SI-NEXT: v_writelane_b32 v8, s49, 7 +; SI-NEXT: v_writelane_b32 v8, s50, 8 +; SI-NEXT: v_writelane_b32 v8, s30, 9 +; SI-NEXT: v_writelane_b32 v8, s31, 10 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; SI-NEXT: v_writelane_b32 v8, s50, 10 ; SI-NEXT: v_readfirstlane_b32 s39, v6 ; SI-NEXT: v_readfirstlane_b32 s48, v5 ; SI-NEXT: v_readfirstlane_b32 s49, v4 @@ -16815,18 +16815,18 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 36, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v8, 9 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s50, v8, 10 -; SI-NEXT: v_readlane_b32 s49, v8, 9 -; SI-NEXT: v_readlane_b32 s48, v8, 8 -; SI-NEXT: v_readlane_b32 s39, v8, 7 -; SI-NEXT: v_readlane_b32 s38, v8, 6 -; SI-NEXT: v_readlane_b32 s37, v8, 5 -; SI-NEXT: v_readlane_b32 s36, v8, 4 -; SI-NEXT: v_readlane_b32 s35, v8, 3 -; SI-NEXT: v_readlane_b32 s34, v8, 2 -; SI-NEXT: v_readlane_b32 s31, v8, 1 -; SI-NEXT: v_readlane_b32 s30, v8, 0 +; SI-NEXT: v_readlane_b32 s31, v8, 10 +; SI-NEXT: v_readlane_b32 s50, v8, 8 +; SI-NEXT: v_readlane_b32 s49, v8, 7 +; SI-NEXT: v_readlane_b32 s48, v8, 6 +; SI-NEXT: v_readlane_b32 s39, v8, 5 +; SI-NEXT: v_readlane_b32 s38, v8, 4 +; SI-NEXT: v_readlane_b32 s37, v8, 3 +; SI-NEXT: v_readlane_b32 s36, v8, 2 +; SI-NEXT: v_readlane_b32 s35, v8, 1 +; SI-NEXT: v_readlane_b32 s34, v8, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll index ee23420c2a662..de18eec1ccc79 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll @@ -6673,8 +6673,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -6992,8 +6992,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB23_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -7007,8 +7007,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -7343,8 +7343,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB23_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -8062,8 +8062,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v16i32_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8080,6 +8078,8 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -8481,10 +8481,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v16i32_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8501,6 +8497,10 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -8812,10 +8812,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v16i32_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8832,6 +8828,10 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -9707,40 +9707,40 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; SI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v4, s30, 0 -; SI-NEXT: v_writelane_b32 v4, s31, 1 -; SI-NEXT: v_writelane_b32 v4, s34, 2 -; SI-NEXT: v_writelane_b32 v4, s35, 3 -; SI-NEXT: v_writelane_b32 v4, s36, 4 -; SI-NEXT: v_writelane_b32 v4, s37, 5 -; SI-NEXT: v_writelane_b32 v4, s38, 6 -; SI-NEXT: v_writelane_b32 v4, s39, 7 -; SI-NEXT: v_writelane_b32 v4, s48, 8 -; SI-NEXT: v_writelane_b32 v4, s49, 9 -; SI-NEXT: v_writelane_b32 v4, s50, 10 -; SI-NEXT: v_writelane_b32 v4, s51, 11 -; SI-NEXT: v_writelane_b32 v4, s52, 12 -; SI-NEXT: v_writelane_b32 v4, s53, 13 -; SI-NEXT: v_writelane_b32 v4, s54, 14 -; SI-NEXT: v_writelane_b32 v4, s55, 15 -; SI-NEXT: v_writelane_b32 v4, s64, 16 -; SI-NEXT: v_writelane_b32 v4, s65, 17 -; SI-NEXT: v_writelane_b32 v4, s66, 18 -; SI-NEXT: v_writelane_b32 v4, s67, 19 -; SI-NEXT: v_writelane_b32 v4, s68, 20 -; SI-NEXT: v_writelane_b32 v4, s69, 21 -; SI-NEXT: v_writelane_b32 v4, s70, 22 -; SI-NEXT: v_writelane_b32 v4, s71, 23 -; SI-NEXT: v_writelane_b32 v4, s80, 24 -; SI-NEXT: v_writelane_b32 v4, s81, 25 -; SI-NEXT: v_writelane_b32 v4, s82, 26 -; SI-NEXT: v_writelane_b32 v4, s83, 27 +; SI-NEXT: v_writelane_b32 v4, s34, 0 +; SI-NEXT: v_writelane_b32 v4, s35, 1 +; SI-NEXT: v_writelane_b32 v4, s36, 2 +; SI-NEXT: v_writelane_b32 v4, s37, 3 +; SI-NEXT: v_writelane_b32 v4, s38, 4 +; SI-NEXT: v_writelane_b32 v4, s39, 5 +; SI-NEXT: v_writelane_b32 v4, s48, 6 +; SI-NEXT: v_writelane_b32 v4, s49, 7 +; SI-NEXT: v_writelane_b32 v4, s50, 8 +; SI-NEXT: v_writelane_b32 v4, s51, 9 +; SI-NEXT: v_writelane_b32 v4, s52, 10 +; SI-NEXT: v_writelane_b32 v4, s53, 11 +; SI-NEXT: v_writelane_b32 v4, s54, 12 +; SI-NEXT: v_writelane_b32 v4, s55, 13 +; SI-NEXT: v_writelane_b32 v4, s64, 14 +; SI-NEXT: v_writelane_b32 v4, s65, 15 +; SI-NEXT: v_writelane_b32 v4, s66, 16 +; SI-NEXT: v_writelane_b32 v4, s67, 17 +; SI-NEXT: v_writelane_b32 v4, s68, 18 +; SI-NEXT: v_writelane_b32 v4, s69, 19 +; SI-NEXT: v_writelane_b32 v4, s70, 20 +; SI-NEXT: v_writelane_b32 v4, s71, 21 +; SI-NEXT: v_writelane_b32 v4, s80, 22 +; SI-NEXT: v_writelane_b32 v4, s81, 23 +; SI-NEXT: v_writelane_b32 v4, s82, 24 +; SI-NEXT: v_writelane_b32 v4, s83, 25 +; SI-NEXT: v_writelane_b32 v4, s84, 26 +; SI-NEXT: v_writelane_b32 v4, s85, 27 +; SI-NEXT: v_writelane_b32 v4, s30, 28 +; SI-NEXT: v_writelane_b32 v4, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v4, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v4, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB25_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 24 @@ -10061,37 +10061,37 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v4, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v4, 29 -; SI-NEXT: v_readlane_b32 s84, v4, 28 -; SI-NEXT: v_readlane_b32 s83, v4, 27 -; SI-NEXT: v_readlane_b32 s82, v4, 26 -; SI-NEXT: v_readlane_b32 s81, v4, 25 -; SI-NEXT: v_readlane_b32 s80, v4, 24 -; SI-NEXT: v_readlane_b32 s71, v4, 23 -; SI-NEXT: v_readlane_b32 s70, v4, 22 -; SI-NEXT: v_readlane_b32 s69, v4, 21 -; SI-NEXT: v_readlane_b32 s68, v4, 20 -; SI-NEXT: v_readlane_b32 s67, v4, 19 -; SI-NEXT: v_readlane_b32 s66, v4, 18 -; SI-NEXT: v_readlane_b32 s65, v4, 17 -; SI-NEXT: v_readlane_b32 s64, v4, 16 -; SI-NEXT: v_readlane_b32 s55, v4, 15 -; SI-NEXT: v_readlane_b32 s54, v4, 14 -; SI-NEXT: v_readlane_b32 s53, v4, 13 -; SI-NEXT: v_readlane_b32 s52, v4, 12 -; SI-NEXT: v_readlane_b32 s51, v4, 11 -; SI-NEXT: v_readlane_b32 s50, v4, 10 -; SI-NEXT: v_readlane_b32 s49, v4, 9 -; SI-NEXT: v_readlane_b32 s48, v4, 8 -; SI-NEXT: v_readlane_b32 s39, v4, 7 -; SI-NEXT: v_readlane_b32 s38, v4, 6 -; SI-NEXT: v_readlane_b32 s37, v4, 5 -; SI-NEXT: v_readlane_b32 s36, v4, 4 -; SI-NEXT: v_readlane_b32 s35, v4, 3 -; SI-NEXT: v_readlane_b32 s34, v4, 2 -; SI-NEXT: v_readlane_b32 s31, v4, 1 -; SI-NEXT: v_readlane_b32 s30, v4, 0 +; SI-NEXT: v_readlane_b32 s31, v4, 29 +; SI-NEXT: v_readlane_b32 s85, v4, 27 +; SI-NEXT: v_readlane_b32 s84, v4, 26 +; SI-NEXT: v_readlane_b32 s83, v4, 25 +; SI-NEXT: v_readlane_b32 s82, v4, 24 +; SI-NEXT: v_readlane_b32 s81, v4, 23 +; SI-NEXT: v_readlane_b32 s80, v4, 22 +; SI-NEXT: v_readlane_b32 s71, v4, 21 +; SI-NEXT: v_readlane_b32 s70, v4, 20 +; SI-NEXT: v_readlane_b32 s69, v4, 19 +; SI-NEXT: v_readlane_b32 s68, v4, 18 +; SI-NEXT: v_readlane_b32 s67, v4, 17 +; SI-NEXT: v_readlane_b32 s66, v4, 16 +; SI-NEXT: v_readlane_b32 s65, v4, 15 +; SI-NEXT: v_readlane_b32 s64, v4, 14 +; SI-NEXT: v_readlane_b32 s55, v4, 13 +; SI-NEXT: v_readlane_b32 s54, v4, 12 +; SI-NEXT: v_readlane_b32 s53, v4, 11 +; SI-NEXT: v_readlane_b32 s52, v4, 10 +; SI-NEXT: v_readlane_b32 s51, v4, 9 +; SI-NEXT: v_readlane_b32 s50, v4, 8 +; SI-NEXT: v_readlane_b32 s49, v4, 7 +; SI-NEXT: v_readlane_b32 s48, v4, 6 +; SI-NEXT: v_readlane_b32 s39, v4, 5 +; SI-NEXT: v_readlane_b32 s38, v4, 4 +; SI-NEXT: v_readlane_b32 s37, v4, 3 +; SI-NEXT: v_readlane_b32 s36, v4, 2 +; SI-NEXT: v_readlane_b32 s35, v4, 1 +; SI-NEXT: v_readlane_b32 s34, v4, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -10154,30 +10154,30 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB25_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -10485,27 +10485,27 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -10568,26 +10568,26 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB25_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -10880,23 +10880,23 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -10959,17 +10959,17 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 ; GFX11-NEXT: s_cbranch_scc0 .LBB25_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -11240,20 +11240,20 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -21564,8 +21564,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -21883,8 +21883,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB47_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -21898,8 +21898,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -22234,8 +22234,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB47_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -22953,8 +22953,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v16f32_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22971,6 +22969,8 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -23372,10 +23372,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v16f32_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23392,6 +23388,10 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -23703,10 +23703,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v16f32_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23723,6 +23719,10 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -24582,40 +24582,40 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s30, 28 +; SI-NEXT: v_writelane_b32 v40, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v40, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s36, v1 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s37, v2 -; SI-NEXT: v_writelane_b32 v40, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB49_3 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s82, s37, 24 @@ -25030,37 +25030,37 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; SI-NEXT: v_or_b32_e32 v2, v3, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 29 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -25073,30 +25073,6 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -25112,6 +25088,30 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB49_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -25440,26 +25440,26 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; VI-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -25517,26 +25517,6 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -25552,6 +25532,26 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB49_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -25873,22 +25873,22 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -25942,18 +25942,18 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB49_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -26301,21 +26301,21 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -35963,8 +35963,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -36282,8 +36282,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB67_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -36297,8 +36297,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -36633,8 +36633,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB67_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -37352,8 +37352,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v8i64_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37370,6 +37368,8 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -37771,10 +37771,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v8i64_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37791,6 +37787,10 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -38102,10 +38102,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v8i64_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -38122,6 +38118,10 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -39007,40 +39007,40 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; SI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v4, s30, 0 -; SI-NEXT: v_writelane_b32 v4, s31, 1 -; SI-NEXT: v_writelane_b32 v4, s34, 2 -; SI-NEXT: v_writelane_b32 v4, s35, 3 -; SI-NEXT: v_writelane_b32 v4, s36, 4 -; SI-NEXT: v_writelane_b32 v4, s37, 5 -; SI-NEXT: v_writelane_b32 v4, s38, 6 -; SI-NEXT: v_writelane_b32 v4, s39, 7 -; SI-NEXT: v_writelane_b32 v4, s48, 8 -; SI-NEXT: v_writelane_b32 v4, s49, 9 -; SI-NEXT: v_writelane_b32 v4, s50, 10 -; SI-NEXT: v_writelane_b32 v4, s51, 11 -; SI-NEXT: v_writelane_b32 v4, s52, 12 -; SI-NEXT: v_writelane_b32 v4, s53, 13 -; SI-NEXT: v_writelane_b32 v4, s54, 14 -; SI-NEXT: v_writelane_b32 v4, s55, 15 -; SI-NEXT: v_writelane_b32 v4, s64, 16 -; SI-NEXT: v_writelane_b32 v4, s65, 17 -; SI-NEXT: v_writelane_b32 v4, s66, 18 -; SI-NEXT: v_writelane_b32 v4, s67, 19 -; SI-NEXT: v_writelane_b32 v4, s68, 20 -; SI-NEXT: v_writelane_b32 v4, s69, 21 -; SI-NEXT: v_writelane_b32 v4, s70, 22 -; SI-NEXT: v_writelane_b32 v4, s71, 23 -; SI-NEXT: v_writelane_b32 v4, s80, 24 -; SI-NEXT: v_writelane_b32 v4, s81, 25 -; SI-NEXT: v_writelane_b32 v4, s82, 26 -; SI-NEXT: v_writelane_b32 v4, s83, 27 +; SI-NEXT: v_writelane_b32 v4, s34, 0 +; SI-NEXT: v_writelane_b32 v4, s35, 1 +; SI-NEXT: v_writelane_b32 v4, s36, 2 +; SI-NEXT: v_writelane_b32 v4, s37, 3 +; SI-NEXT: v_writelane_b32 v4, s38, 4 +; SI-NEXT: v_writelane_b32 v4, s39, 5 +; SI-NEXT: v_writelane_b32 v4, s48, 6 +; SI-NEXT: v_writelane_b32 v4, s49, 7 +; SI-NEXT: v_writelane_b32 v4, s50, 8 +; SI-NEXT: v_writelane_b32 v4, s51, 9 +; SI-NEXT: v_writelane_b32 v4, s52, 10 +; SI-NEXT: v_writelane_b32 v4, s53, 11 +; SI-NEXT: v_writelane_b32 v4, s54, 12 +; SI-NEXT: v_writelane_b32 v4, s55, 13 +; SI-NEXT: v_writelane_b32 v4, s64, 14 +; SI-NEXT: v_writelane_b32 v4, s65, 15 +; SI-NEXT: v_writelane_b32 v4, s66, 16 +; SI-NEXT: v_writelane_b32 v4, s67, 17 +; SI-NEXT: v_writelane_b32 v4, s68, 18 +; SI-NEXT: v_writelane_b32 v4, s69, 19 +; SI-NEXT: v_writelane_b32 v4, s70, 20 +; SI-NEXT: v_writelane_b32 v4, s71, 21 +; SI-NEXT: v_writelane_b32 v4, s80, 22 +; SI-NEXT: v_writelane_b32 v4, s81, 23 +; SI-NEXT: v_writelane_b32 v4, s82, 24 +; SI-NEXT: v_writelane_b32 v4, s83, 25 +; SI-NEXT: v_writelane_b32 v4, s84, 26 +; SI-NEXT: v_writelane_b32 v4, s85, 27 +; SI-NEXT: v_writelane_b32 v4, s30, 28 +; SI-NEXT: v_writelane_b32 v4, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v4, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v4, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB69_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 24 @@ -39361,37 +39361,37 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v4, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v4, 29 -; SI-NEXT: v_readlane_b32 s84, v4, 28 -; SI-NEXT: v_readlane_b32 s83, v4, 27 -; SI-NEXT: v_readlane_b32 s82, v4, 26 -; SI-NEXT: v_readlane_b32 s81, v4, 25 -; SI-NEXT: v_readlane_b32 s80, v4, 24 -; SI-NEXT: v_readlane_b32 s71, v4, 23 -; SI-NEXT: v_readlane_b32 s70, v4, 22 -; SI-NEXT: v_readlane_b32 s69, v4, 21 -; SI-NEXT: v_readlane_b32 s68, v4, 20 -; SI-NEXT: v_readlane_b32 s67, v4, 19 -; SI-NEXT: v_readlane_b32 s66, v4, 18 -; SI-NEXT: v_readlane_b32 s65, v4, 17 -; SI-NEXT: v_readlane_b32 s64, v4, 16 -; SI-NEXT: v_readlane_b32 s55, v4, 15 -; SI-NEXT: v_readlane_b32 s54, v4, 14 -; SI-NEXT: v_readlane_b32 s53, v4, 13 -; SI-NEXT: v_readlane_b32 s52, v4, 12 -; SI-NEXT: v_readlane_b32 s51, v4, 11 -; SI-NEXT: v_readlane_b32 s50, v4, 10 -; SI-NEXT: v_readlane_b32 s49, v4, 9 -; SI-NEXT: v_readlane_b32 s48, v4, 8 -; SI-NEXT: v_readlane_b32 s39, v4, 7 -; SI-NEXT: v_readlane_b32 s38, v4, 6 -; SI-NEXT: v_readlane_b32 s37, v4, 5 -; SI-NEXT: v_readlane_b32 s36, v4, 4 -; SI-NEXT: v_readlane_b32 s35, v4, 3 -; SI-NEXT: v_readlane_b32 s34, v4, 2 -; SI-NEXT: v_readlane_b32 s31, v4, 1 -; SI-NEXT: v_readlane_b32 s30, v4, 0 +; SI-NEXT: v_readlane_b32 s31, v4, 29 +; SI-NEXT: v_readlane_b32 s85, v4, 27 +; SI-NEXT: v_readlane_b32 s84, v4, 26 +; SI-NEXT: v_readlane_b32 s83, v4, 25 +; SI-NEXT: v_readlane_b32 s82, v4, 24 +; SI-NEXT: v_readlane_b32 s81, v4, 23 +; SI-NEXT: v_readlane_b32 s80, v4, 22 +; SI-NEXT: v_readlane_b32 s71, v4, 21 +; SI-NEXT: v_readlane_b32 s70, v4, 20 +; SI-NEXT: v_readlane_b32 s69, v4, 19 +; SI-NEXT: v_readlane_b32 s68, v4, 18 +; SI-NEXT: v_readlane_b32 s67, v4, 17 +; SI-NEXT: v_readlane_b32 s66, v4, 16 +; SI-NEXT: v_readlane_b32 s65, v4, 15 +; SI-NEXT: v_readlane_b32 s64, v4, 14 +; SI-NEXT: v_readlane_b32 s55, v4, 13 +; SI-NEXT: v_readlane_b32 s54, v4, 12 +; SI-NEXT: v_readlane_b32 s53, v4, 11 +; SI-NEXT: v_readlane_b32 s52, v4, 10 +; SI-NEXT: v_readlane_b32 s51, v4, 9 +; SI-NEXT: v_readlane_b32 s50, v4, 8 +; SI-NEXT: v_readlane_b32 s49, v4, 7 +; SI-NEXT: v_readlane_b32 s48, v4, 6 +; SI-NEXT: v_readlane_b32 s39, v4, 5 +; SI-NEXT: v_readlane_b32 s38, v4, 4 +; SI-NEXT: v_readlane_b32 s37, v4, 3 +; SI-NEXT: v_readlane_b32 s36, v4, 2 +; SI-NEXT: v_readlane_b32 s35, v4, 1 +; SI-NEXT: v_readlane_b32 s34, v4, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -39454,30 +39454,30 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB69_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -39785,27 +39785,27 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -39868,26 +39868,26 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB69_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -40180,23 +40180,23 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -40259,17 +40259,17 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 ; GFX11-NEXT: s_cbranch_scc0 .LBB69_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -40540,20 +40540,20 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -49422,8 +49422,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -49741,8 +49741,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB83_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -49756,8 +49756,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -50092,8 +50092,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB83_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -50811,8 +50811,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v8f64_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -50829,6 +50827,8 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr47 @@ -51222,10 +51222,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v8f64_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -51242,6 +51238,10 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -51545,10 +51545,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v8f64_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -51565,6 +51561,10 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -52416,42 +52416,42 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s30, 30 +; SI-NEXT: v_writelane_b32 v40, s31, 31 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v40, s86, 30 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v40, s87, 31 ; SI-NEXT: s_cbranch_scc0 .LBB85_3 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s48, s5, 24 @@ -52850,39 +52850,39 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v40, 30 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 31 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -52895,30 +52895,30 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v40, s30, 0 -; VI-NEXT: v_writelane_b32 v40, s31, 1 -; VI-NEXT: v_writelane_b32 v40, s34, 2 -; VI-NEXT: v_writelane_b32 v40, s35, 3 -; VI-NEXT: v_writelane_b32 v40, s36, 4 -; VI-NEXT: v_writelane_b32 v40, s37, 5 -; VI-NEXT: v_writelane_b32 v40, s38, 6 -; VI-NEXT: v_writelane_b32 v40, s39, 7 -; VI-NEXT: v_writelane_b32 v40, s48, 8 -; VI-NEXT: v_writelane_b32 v40, s49, 9 -; VI-NEXT: v_writelane_b32 v40, s50, 10 -; VI-NEXT: v_writelane_b32 v40, s51, 11 -; VI-NEXT: v_writelane_b32 v40, s52, 12 -; VI-NEXT: v_writelane_b32 v40, s53, 13 -; VI-NEXT: v_writelane_b32 v40, s54, 14 -; VI-NEXT: v_writelane_b32 v40, s55, 15 -; VI-NEXT: v_writelane_b32 v40, s64, 16 -; VI-NEXT: v_writelane_b32 v40, s65, 17 +; VI-NEXT: v_writelane_b32 v40, s34, 0 +; VI-NEXT: v_writelane_b32 v40, s35, 1 +; VI-NEXT: v_writelane_b32 v40, s36, 2 +; VI-NEXT: v_writelane_b32 v40, s37, 3 +; VI-NEXT: v_writelane_b32 v40, s38, 4 +; VI-NEXT: v_writelane_b32 v40, s39, 5 +; VI-NEXT: v_writelane_b32 v40, s48, 6 +; VI-NEXT: v_writelane_b32 v40, s49, 7 +; VI-NEXT: v_writelane_b32 v40, s50, 8 +; VI-NEXT: v_writelane_b32 v40, s51, 9 +; VI-NEXT: v_writelane_b32 v40, s52, 10 +; VI-NEXT: v_writelane_b32 v40, s53, 11 +; VI-NEXT: v_writelane_b32 v40, s54, 12 +; VI-NEXT: v_writelane_b32 v40, s55, 13 +; VI-NEXT: v_writelane_b32 v40, s64, 14 +; VI-NEXT: v_writelane_b32 v40, s65, 15 +; VI-NEXT: v_writelane_b32 v40, s66, 16 +; VI-NEXT: v_writelane_b32 v40, s67, 17 +; VI-NEXT: v_writelane_b32 v40, s30, 18 +; VI-NEXT: v_writelane_b32 v40, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v40, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v40, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB85_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -53270,27 +53270,27 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v40, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v40, 19 -; VI-NEXT: v_readlane_b32 s66, v40, 18 -; VI-NEXT: v_readlane_b32 s65, v40, 17 -; VI-NEXT: v_readlane_b32 s64, v40, 16 -; VI-NEXT: v_readlane_b32 s55, v40, 15 -; VI-NEXT: v_readlane_b32 s54, v40, 14 -; VI-NEXT: v_readlane_b32 s53, v40, 13 -; VI-NEXT: v_readlane_b32 s52, v40, 12 -; VI-NEXT: v_readlane_b32 s51, v40, 11 -; VI-NEXT: v_readlane_b32 s50, v40, 10 -; VI-NEXT: v_readlane_b32 s49, v40, 9 -; VI-NEXT: v_readlane_b32 s48, v40, 8 -; VI-NEXT: v_readlane_b32 s39, v40, 7 -; VI-NEXT: v_readlane_b32 s38, v40, 6 -; VI-NEXT: v_readlane_b32 s37, v40, 5 -; VI-NEXT: v_readlane_b32 s36, v40, 4 -; VI-NEXT: v_readlane_b32 s35, v40, 3 -; VI-NEXT: v_readlane_b32 s34, v40, 2 -; VI-NEXT: v_readlane_b32 s31, v40, 1 -; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 19 +; VI-NEXT: v_readlane_b32 s67, v40, 17 +; VI-NEXT: v_readlane_b32 s66, v40, 16 +; VI-NEXT: v_readlane_b32 s65, v40, 15 +; VI-NEXT: v_readlane_b32 s64, v40, 14 +; VI-NEXT: v_readlane_b32 s55, v40, 13 +; VI-NEXT: v_readlane_b32 s54, v40, 12 +; VI-NEXT: v_readlane_b32 s53, v40, 11 +; VI-NEXT: v_readlane_b32 s52, v40, 10 +; VI-NEXT: v_readlane_b32 s51, v40, 9 +; VI-NEXT: v_readlane_b32 s50, v40, 8 +; VI-NEXT: v_readlane_b32 s49, v40, 7 +; VI-NEXT: v_readlane_b32 s48, v40, 6 +; VI-NEXT: v_readlane_b32 s39, v40, 5 +; VI-NEXT: v_readlane_b32 s38, v40, 4 +; VI-NEXT: v_readlane_b32 s37, v40, 3 +; VI-NEXT: v_readlane_b32 s36, v40, 2 +; VI-NEXT: v_readlane_b32 s35, v40, 1 +; VI-NEXT: v_readlane_b32 s34, v40, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -53303,26 +53303,26 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s30, 14 +; GFX9-NEXT: v_writelane_b32 v40, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB85_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -53659,23 +53659,23 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 15 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -53688,18 +53688,18 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v33, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v33, s30, 0 +; GFX11-NEXT: v_writelane_b32 v33, s34, 0 +; GFX11-NEXT: v_writelane_b32 v33, s35, 1 +; GFX11-NEXT: v_writelane_b32 v33, s36, 2 +; GFX11-NEXT: v_writelane_b32 v33, s37, 3 +; GFX11-NEXT: v_writelane_b32 v33, s38, 4 +; GFX11-NEXT: v_writelane_b32 v33, s39, 5 +; GFX11-NEXT: v_writelane_b32 v33, s48, 6 +; GFX11-NEXT: v_writelane_b32 v33, s49, 7 +; GFX11-NEXT: v_writelane_b32 v33, s30, 8 +; GFX11-NEXT: v_writelane_b32 v33, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s90, 0 -; GFX11-NEXT: v_writelane_b32 v33, s31, 1 -; GFX11-NEXT: v_writelane_b32 v33, s34, 2 -; GFX11-NEXT: v_writelane_b32 v33, s35, 3 -; GFX11-NEXT: v_writelane_b32 v33, s36, 4 -; GFX11-NEXT: v_writelane_b32 v33, s37, 5 -; GFX11-NEXT: v_writelane_b32 v33, s38, 6 -; GFX11-NEXT: v_writelane_b32 v33, s39, 7 -; GFX11-NEXT: v_writelane_b32 v33, s48, 8 -; GFX11-NEXT: v_writelane_b32 v33, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB85_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -54037,21 +54037,21 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-NEXT: v_mov_b32_e32 v4, s1 +; GFX11-NEXT: v_readlane_b32 s30, v33, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off ; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v33, 9 -; GFX11-NEXT: v_readlane_b32 s48, v33, 8 -; GFX11-NEXT: v_readlane_b32 s39, v33, 7 -; GFX11-NEXT: v_readlane_b32 s38, v33, 6 -; GFX11-NEXT: v_readlane_b32 s37, v33, 5 -; GFX11-NEXT: v_readlane_b32 s36, v33, 4 -; GFX11-NEXT: v_readlane_b32 s35, v33, 3 -; GFX11-NEXT: v_readlane_b32 s34, v33, 2 -; GFX11-NEXT: v_readlane_b32 s31, v33, 1 -; GFX11-NEXT: v_readlane_b32 s30, v33, 0 +; GFX11-NEXT: v_readlane_b32 s31, v33, 9 +; GFX11-NEXT: v_readlane_b32 s49, v33, 7 +; GFX11-NEXT: v_readlane_b32 s48, v33, 6 +; GFX11-NEXT: v_readlane_b32 s39, v33, 5 +; GFX11-NEXT: v_readlane_b32 s38, v33, 4 +; GFX11-NEXT: v_readlane_b32 s37, v33, 3 +; GFX11-NEXT: v_readlane_b32 s36, v33, 2 +; GFX11-NEXT: v_readlane_b32 s35, v33, 1 +; GFX11-NEXT: v_readlane_b32 s34, v33, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -58733,9 +58733,9 @@ define inreg <32 x half> @bitcast_v32i16_to_v32f16_scalar(<32 x i16> inreg %a, i ; SI-LABEL: bitcast_v32i16_to_v32f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: v_mov_b32_e32 v54, v17 ; SI-NEXT: v_mov_b32_e32 v53, v16 ; SI-NEXT: v_mov_b32_e32 v52, v15 @@ -61978,7 +61978,6 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; SI-LABEL: bitcast_v32bf16_to_v32i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -61995,6 +61994,7 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_waitcnt expcnt(6) ; SI-NEXT: v_mul_f32_e64 v57, 1.0, s16 @@ -62247,8 +62247,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v20, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -62566,8 +62566,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB95_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -62581,8 +62581,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -62901,8 +62901,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB95_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -64359,8 +64359,24 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; ; VI-LABEL: bitcast_v32i16_to_v64i8: ; VI: ; %bb.0: -; VI-NEXT: ; implicit-def: $vgpr19 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: ; implicit-def: $vgpr19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr19 @@ -64381,22 +64397,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr19 ; VI-NEXT: ; implicit-def: $vgpr19 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; kill: killed $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr17 @@ -64829,10 +64829,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32i16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -64849,6 +64845,10 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -65725,43 +65725,43 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: s_mov_b32 s93, s18 ; SI-NEXT: s_mov_b32 s31, s17 ; SI-NEXT: v_readfirstlane_b32 s59, v18 @@ -66280,45 +66280,45 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v21, 11 ; SI-NEXT: v_readlane_b32 s17, v21, 17 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -66409,30 +66409,30 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB97_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -66804,27 +66804,27 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -66887,26 +66887,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -66922,6 +66902,26 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB97_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -67243,22 +67243,22 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -67312,18 +67312,18 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB97_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -67671,21 +67671,21 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -74579,7 +74579,6 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; SI-LABEL: bitcast_v32bf16_to_v32f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -74596,6 +74595,7 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v32, 1.0, s16 ; SI-NEXT: v_mul_f32_e64 v33, 1.0, s17 @@ -74893,8 +74893,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v20, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -75212,8 +75212,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB103_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -75227,8 +75227,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -75563,8 +75563,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB103_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -77045,6 +77045,22 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; VI-LABEL: bitcast_v32f16_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr17 @@ -77067,22 +77083,6 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v53, 16, v1 ; VI-NEXT: ; kill: killed $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr22 ; VI-NEXT: ; implicit-def: $vgpr24 ; VI-NEXT: ; implicit-def: $vgpr55 @@ -77397,10 +77397,6 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32f16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -77417,6 +77413,10 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -78293,8 +78293,12 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s30, 4 +; SI-NEXT: v_writelane_b32 v40, s31, 5 ; SI-NEXT: v_cvt_f16_f32_e32 v21, s17 ; SI-NEXT: v_cvt_f16_f32_e32 v20, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v22, v1 @@ -78327,12 +78331,8 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: v_cvt_f16_f32_e32 v13, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v17, s29 ; SI-NEXT: v_cvt_f16_f32_e32 v16, s28 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v40, s36, 4 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_writelane_b32 v40, s37, 5 ; SI-NEXT: s_cbranch_scc0 .LBB105_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_readfirstlane_b32 s4, v21 @@ -78833,13 +78833,13 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: v_or_b32_e32 v1, v2, v1 ; SI-NEXT: v_or_b32_e32 v1, s4, v1 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 4 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 5 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -78902,30 +78902,6 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -78941,6 +78917,30 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB105_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s75, s5, 24 @@ -79320,26 +79320,26 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v54 ; VI-NEXT: v_or_b32_sdwa v1, v49, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-NEXT: v_or_b32_sdwa v2, v25, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -79399,26 +79399,6 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -79434,6 +79414,26 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -79756,22 +79756,22 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -79825,18 +79825,18 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -80184,21 +80184,21 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -82605,17 +82605,17 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_writelane_b32 v32, s34, 0 +; SI-NEXT: v_writelane_b32 v32, s35, 1 +; SI-NEXT: v_writelane_b32 v32, s36, 2 +; SI-NEXT: v_writelane_b32 v32, s37, 3 +; SI-NEXT: v_writelane_b32 v32, s38, 4 +; SI-NEXT: v_writelane_b32 v32, s39, 5 +; SI-NEXT: v_writelane_b32 v32, s30, 6 +; SI-NEXT: v_writelane_b32 v32, s31, 7 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 ; SI-NEXT: v_readfirstlane_b32 s46, v20 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v32, s30, 0 -; SI-NEXT: v_writelane_b32 v32, s31, 1 -; SI-NEXT: v_writelane_b32 v32, s34, 2 -; SI-NEXT: v_writelane_b32 v32, s35, 3 -; SI-NEXT: v_writelane_b32 v32, s36, 4 -; SI-NEXT: v_writelane_b32 v32, s37, 5 -; SI-NEXT: v_writelane_b32 v32, s38, 6 -; SI-NEXT: v_writelane_b32 v32, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s74, v30 ; SI-NEXT: v_readfirstlane_b32 s61, v29 ; SI-NEXT: v_readfirstlane_b32 s63, v28 @@ -83031,14 +83031,14 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32 ; SI-NEXT: v_cvt_f32_f16_e32 v30, s5 ; SI-NEXT: v_cvt_f32_f16_e32 v31, s4 ; SI-NEXT: .LBB107_3: ; %end -; SI-NEXT: v_readlane_b32 s39, v32, 7 -; SI-NEXT: v_readlane_b32 s38, v32, 6 -; SI-NEXT: v_readlane_b32 s37, v32, 5 -; SI-NEXT: v_readlane_b32 s36, v32, 4 -; SI-NEXT: v_readlane_b32 s35, v32, 3 -; SI-NEXT: v_readlane_b32 s34, v32, 2 -; SI-NEXT: v_readlane_b32 s31, v32, 1 -; SI-NEXT: v_readlane_b32 s30, v32, 0 +; SI-NEXT: v_readlane_b32 s30, v32, 6 +; SI-NEXT: v_readlane_b32 s31, v32, 7 +; SI-NEXT: v_readlane_b32 s39, v32, 5 +; SI-NEXT: v_readlane_b32 s38, v32, 4 +; SI-NEXT: v_readlane_b32 s37, v32, 3 +; SI-NEXT: v_readlane_b32 s36, v32, 2 +; SI-NEXT: v_readlane_b32 s35, v32, 1 +; SI-NEXT: v_readlane_b32 s34, v32, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -85271,10 +85271,6 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; VI-LABEL: bitcast_v32bf16_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -85291,6 +85287,10 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -85875,12 +85875,6 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32bf16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -85897,6 +85891,12 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr28 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -87562,7 +87562,6 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; SI-LABEL: bitcast_v32bf16_to_v64i8_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -87579,6 +87578,7 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v19, 1.0, s17 ; SI-NEXT: v_mul_f32_e32 v33, 1.0, v2 @@ -88167,30 +88167,6 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -88206,6 +88182,30 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB109_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -88807,26 +88807,26 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v34 ; VI-NEXT: v_or_b32_sdwa v1, v13, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-NEXT: v_or_b32_sdwa v2, v33, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -88884,26 +88884,26 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB109_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s92, s5, 24 @@ -89541,23 +89541,23 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -89620,19 +89620,19 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s50, 7 +; GFX11-NEXT: v_writelane_b32 v17, s51, 8 +; GFX11-NEXT: v_writelane_b32 v17, s30, 9 +; GFX11-NEXT: v_writelane_b32 v17, s31, 10 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s50, 9 -; GFX11-NEXT: v_writelane_b32 v17, s51, 10 ; GFX11-NEXT: s_cbranch_scc0 .LBB109_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s62, s27, 24 @@ -90280,22 +90280,22 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 9 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s51, v17, 10 -; GFX11-NEXT: v_readlane_b32 s50, v17, 9 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 10 +; GFX11-NEXT: v_readlane_b32 s51, v17, 8 +; GFX11-NEXT: v_readlane_b32 s50, v17, 7 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll index 5d4df4bde1af8..07c574944ad4e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll @@ -656,36 +656,36 @@ define inreg <18 x i32> @bitcast_v18f32_to_v18i32_scalar(<18 x float> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB3_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -2075,36 +2075,36 @@ define inreg <18 x i32> @bitcast_v9f64_to_v18i32_scalar(<9 x double> inreg %a, i ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB11_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -3806,7 +3806,6 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v18i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -3823,6 +3822,7 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4121,13 +4121,13 @@ define inreg <18 x i32> @bitcast_v36i16_to_v18i32_scalar(<36 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v36i16_to_v18i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -4710,7 +4710,6 @@ define <36 x half> @bitcast_v18i32_to_v36f16(<18 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v18i32_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -4718,6 +4717,7 @@ define <36 x half> @bitcast_v18i32_to_v36f16(<18 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -6669,7 +6669,6 @@ define <18 x i32> @bitcast_v36f16_to_v18i32(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v18i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6686,6 +6685,7 @@ define <18 x i32> @bitcast_v36f16_to_v18i32(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -7970,36 +7970,36 @@ define inreg <9 x i64> @bitcast_v18f32_to_v9i64_scalar(<18 x float> inreg %a, i3 ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB21_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -8731,36 +8731,36 @@ define inreg <9 x double> @bitcast_v18f32_to_v9f64_scalar(<18 x float> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB25_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -9077,36 +9077,36 @@ define inreg <18 x float> @bitcast_v9f64_to_v18f32_scalar(<9 x double> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB27_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -10939,7 +10939,6 @@ define <18 x float> @bitcast_v36i16_to_v18f32(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v18f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -10956,6 +10955,7 @@ define <18 x float> @bitcast_v36i16_to_v18f32(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -11254,13 +11254,13 @@ define inreg <18 x float> @bitcast_v36i16_to_v18f32_scalar(<36 x i16> inreg %a, ; SI-LABEL: bitcast_v36i16_to_v18f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -11843,7 +11843,6 @@ define <36 x half> @bitcast_v18f32_to_v36f16(<18 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v18f32_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -11851,6 +11850,7 @@ define <36 x half> @bitcast_v18f32_to_v36f16(<18 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -13940,7 +13940,6 @@ define <18 x float> @bitcast_v36f16_to_v18f32(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v18f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -13957,6 +13956,7 @@ define <18 x float> @bitcast_v36f16_to_v18f32(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -15547,36 +15547,36 @@ define inreg <9 x i64> @bitcast_v9f64_to_v9i64_scalar(<9 x double> inreg %a, i32 ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB39_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -17288,7 +17288,6 @@ define <9 x i64> @bitcast_v36i16_to_v9i64(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v9i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17305,6 +17304,7 @@ define <9 x i64> @bitcast_v36i16_to_v9i64(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -17603,13 +17603,13 @@ define inreg <9 x i64> @bitcast_v36i16_to_v9i64_scalar(<36 x i16> inreg %a, i32 ; SI-LABEL: bitcast_v36i16_to_v9i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -18192,7 +18192,6 @@ define <36 x half> @bitcast_v9i64_to_v36f16(<9 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v9i64_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -18200,6 +18199,7 @@ define <36 x half> @bitcast_v9i64_to_v36f16(<9 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -20161,7 +20161,6 @@ define <9 x i64> @bitcast_v36f16_to_v9i64(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v9i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -20178,6 +20177,7 @@ define <9 x i64> @bitcast_v36f16_to_v9i64(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -22864,7 +22864,6 @@ define <9 x double> @bitcast_v36i16_to_v9f64(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v9f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22881,6 +22880,7 @@ define <9 x double> @bitcast_v36i16_to_v9f64(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -23179,13 +23179,13 @@ define inreg <9 x double> @bitcast_v36i16_to_v9f64_scalar(<36 x i16> inreg %a, i ; SI-LABEL: bitcast_v36i16_to_v9f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -23768,7 +23768,6 @@ define <36 x half> @bitcast_v9f64_to_v36f16(<9 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v9f64_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -23776,6 +23775,7 @@ define <36 x half> @bitcast_v9f64_to_v36f16(<9 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -25787,7 +25787,6 @@ define <9 x double> @bitcast_v36f16_to_v9f64(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v9f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25804,6 +25803,7 @@ define <9 x double> @bitcast_v36f16_to_v9f64(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -27586,8 +27586,6 @@ define inreg <36 x half> @bitcast_v36i16_to_v36f16_scalar(<36 x i16> inreg %a, i ; SI-LABEL: bitcast_v36i16_to_v36f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill @@ -27599,6 +27597,8 @@ define inreg <36 x half> @bitcast_v36i16_to_v36f16_scalar(<36 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_waitcnt expcnt(4) diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll index 44cfd6c28ca6a..1648368af460a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll @@ -4019,7 +4019,6 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v20i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -4036,6 +4035,7 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4376,7 +4376,6 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v40i16_to_v20i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -4387,6 +4386,7 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -4913,85 +4913,157 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -5289,7 +5361,6 @@ define <40 x half> @bitcast_v20i32_to_v40f16(<20 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v20i32_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -5303,6 +5374,7 @@ define <40 x half> @bitcast_v20i32_to_v40f16(<20 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -7495,7 +7567,6 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v20i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -7512,6 +7583,7 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -8519,85 +8591,157 @@ define inreg <20 x i32> @bitcast_v40f16_to_v20i32_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -12218,7 +12362,6 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v20f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -12235,6 +12378,7 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -12575,7 +12719,6 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; SI-LABEL: bitcast_v40i16_to_v20f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -12586,6 +12729,7 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -13112,85 +13256,157 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -13488,7 +13704,6 @@ define <40 x half> @bitcast_v20f32_to_v40f16(<20 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v20f32_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -13502,6 +13717,7 @@ define <40 x half> @bitcast_v20f32_to_v40f16(<20 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -14276,6 +14492,9 @@ define inreg <40 x half> @bitcast_v20f32_to_v40f16_scalar(<20 x float> inreg %a, ; SI-LABEL: bitcast_v20f32_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; SI-NEXT: v_readfirstlane_b32 s11, v1 ; SI-NEXT: v_readfirstlane_b32 s10, v2 @@ -14284,9 +14503,6 @@ define inreg <40 x half> @bitcast_v20f32_to_v40f16_scalar(<20 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v5 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v6 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -15808,7 +16024,6 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v20f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -15825,6 +16040,7 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -16832,85 +17048,157 @@ define inreg <20 x float> @bitcast_v40f16_to_v20f32_scalar(<40 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -19727,7 +20015,6 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v10i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -19744,6 +20031,7 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -20084,7 +20372,6 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v40i16_to_v10i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -20095,6 +20382,7 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -20621,85 +20909,157 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -20997,7 +21357,6 @@ define <40 x half> @bitcast_v10i64_to_v40f16(<10 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v10i64_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -21011,6 +21370,7 @@ define <40 x half> @bitcast_v10i64_to_v40f16(<10 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -23213,7 +23573,6 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v10i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23230,6 +23589,7 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -24237,85 +24597,157 @@ define inreg <10 x i64> @bitcast_v40f16_to_v10i64_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -26466,7 +26898,6 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v10f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -26483,6 +26914,7 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -26823,7 +27255,6 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; SI-LABEL: bitcast_v40i16_to_v10f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -26834,6 +27265,7 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -27360,85 +27792,157 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -27736,7 +28240,6 @@ define <40 x half> @bitcast_v10f64_to_v40f16(<10 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v10f64_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -27750,6 +28253,7 @@ define <40 x half> @bitcast_v10f64_to_v40f16(<10 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -28484,6 +28988,10 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a ; SI-LABEL: bitcast_v10f64_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; SI-NEXT: v_readfirstlane_b32 s8, v1 ; SI-NEXT: v_readfirstlane_b32 s9, v2 @@ -28492,10 +29000,6 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v5 ; SI-NEXT: s_and_b64 s[10:11], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v6 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s10, s5, 16 @@ -29989,7 +30493,6 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v10f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -30006,6 +30509,7 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -31013,85 +31517,157 @@ define inreg <10 x double> @bitcast_v40f16_to_v10f64_scalar(<40 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -32303,8 +32879,6 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; SI-LABEL: bitcast_v40i16_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -32321,6 +32895,8 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_cvt_f32_f16_e32 v30, v15 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll index 87d5157b3c340..010c7f18fa513 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll @@ -4340,7 +4340,6 @@ define <22 x i32> @bitcast_v44i16_to_v22i32(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v22i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -4357,6 +4356,7 @@ define <22 x i32> @bitcast_v44i16_to_v22i32(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4739,7 +4739,6 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v44i16_to_v22i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -4754,6 +4753,7 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -5328,87 +5328,161 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -5722,10 +5796,6 @@ define <44 x half> @bitcast_v22i32_to_v44f16(<22 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v22i32_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -5742,6 +5812,10 @@ define <44 x half> @bitcast_v22i32_to_v44f16(<22 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -8182,7 +8256,6 @@ define <22 x i32> @bitcast_v44f16_to_v22i32(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v22i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8199,6 +8272,7 @@ define <22 x i32> @bitcast_v44f16_to_v22i32(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -9310,87 +9384,161 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -13303,7 +13451,6 @@ define <22 x float> @bitcast_v44i16_to_v22f32(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v22f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -13320,6 +13467,7 @@ define <22 x float> @bitcast_v44i16_to_v22f32(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -13702,7 +13850,6 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; SI-LABEL: bitcast_v44i16_to_v22f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -13717,6 +13864,7 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -14291,87 +14439,161 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -14685,10 +14907,6 @@ define <44 x half> @bitcast_v22f32_to_v44f16(<22 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v22f32_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -14705,6 +14923,10 @@ define <44 x half> @bitcast_v22f32_to_v44f16(<22 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -15560,6 +15782,14 @@ define inreg <44 x half> @bitcast_v22f32_to_v44f16_scalar(<22 x float> inreg %a, ; SI-LABEL: bitcast_v22f32_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; SI-NEXT: v_readfirstlane_b32 s13, v1 ; SI-NEXT: v_readfirstlane_b32 s12, v2 @@ -15570,14 +15800,6 @@ define inreg <44 x half> @bitcast_v22f32_to_v44f16_scalar(<22 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v7 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v8 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -17278,7 +17500,6 @@ define <22 x float> @bitcast_v44f16_to_v22f32(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v22f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17295,6 +17516,7 @@ define <22 x float> @bitcast_v44f16_to_v22f32(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -18406,87 +18628,161 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -21552,7 +21848,6 @@ define <11 x i64> @bitcast_v44i16_to_v11i64(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v11i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -21569,6 +21864,7 @@ define <11 x i64> @bitcast_v44i16_to_v11i64(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -21951,7 +22247,6 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v44i16_to_v11i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -21966,6 +22261,7 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -22540,87 +22836,161 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -22934,10 +23304,6 @@ define <44 x half> @bitcast_v11i64_to_v44f16(<11 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v11i64_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22954,6 +23320,10 @@ define <44 x half> @bitcast_v11i64_to_v44f16(<11 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -25406,7 +25776,6 @@ define <11 x i64> @bitcast_v44f16_to_v11i64(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v11i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25423,6 +25792,7 @@ define <11 x i64> @bitcast_v44f16_to_v11i64(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -26534,87 +26904,161 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -28968,7 +29412,6 @@ define <11 x double> @bitcast_v44i16_to_v11f64(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v11f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -28985,6 +29428,7 @@ define <11 x double> @bitcast_v44i16_to_v11f64(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -29367,7 +29811,6 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; SI-LABEL: bitcast_v44i16_to_v11f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -29382,6 +29825,7 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -29956,87 +30400,161 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -30350,10 +30868,6 @@ define <44 x half> @bitcast_v11f64_to_v44f16(<11 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v11f64_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -30370,6 +30884,10 @@ define <44 x half> @bitcast_v11f64_to_v44f16(<11 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -31181,6 +31699,15 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a ; SI-LABEL: bitcast_v11f64_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; SI-NEXT: v_readfirstlane_b32 s10, v1 ; SI-NEXT: v_readfirstlane_b32 s11, v2 @@ -31191,15 +31718,6 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v7 ; SI-NEXT: s_and_b64 s[12:13], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v8 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s12, s5, 16 @@ -32867,7 +33385,6 @@ define <11 x double> @bitcast_v44f16_to_v11f64(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v11f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -32884,6 +33401,7 @@ define <11 x double> @bitcast_v44f16_to_v11f64(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -33995,87 +34513,161 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -35429,7 +36021,6 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; SI-LABEL: bitcast_v44i16_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill @@ -35446,7 +36037,8 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 @@ -37436,7 +38028,6 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-LABEL: bitcast_v44f16_to_v44i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill @@ -37453,6 +38044,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: s_waitcnt expcnt(6) ; SI-NEXT: v_cvt_f16_f32_e32 v57, v2 ; SI-NEXT: s_waitcnt expcnt(5) @@ -37500,7 +38092,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v38, s25 ; SI-NEXT: v_cvt_f16_f32_e32 v16, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v29, s29 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll index fb2e94fc3b87a..3fbedf74d9e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll @@ -2440,8 +2440,8 @@ define <48 x i16> @bitcast_v24i32_to_v48i16(<24 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v24i32_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr51 @@ -3193,10 +3193,11 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v12, s30, 0 -; SI-NEXT: v_writelane_b32 v12, s31, 1 +; SI-NEXT: v_writelane_b32 v12, s34, 0 +; SI-NEXT: v_writelane_b32 v12, s35, 1 +; SI-NEXT: v_writelane_b32 v12, s30, 2 +; SI-NEXT: v_writelane_b32 v12, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_writelane_b32 v12, s34, 2 ; SI-NEXT: v_readfirstlane_b32 s12, v1 ; SI-NEXT: v_readfirstlane_b32 s13, v2 ; SI-NEXT: v_readfirstlane_b32 s10, v3 @@ -3208,7 +3209,6 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v9 ; SI-NEXT: s_and_b64 s[14:15], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v10 -; SI-NEXT: v_writelane_b32 v12, s35, 3 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s88, s5, 16 @@ -3449,11 +3449,11 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x5c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v12, 2 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s35, v12, 3 -; SI-NEXT: v_readlane_b32 s34, v12, 2 -; SI-NEXT: v_readlane_b32 s31, v12, 1 -; SI-NEXT: v_readlane_b32 s30, v12, 0 +; SI-NEXT: v_readlane_b32 s31, v12, 3 +; SI-NEXT: v_readlane_b32 s35, v12, 1 +; SI-NEXT: v_readlane_b32 s34, v12, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -5655,6 +5655,10 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v48i16_to_v24i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -5680,10 +5684,6 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -5805,89 +5805,165 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -6216,16 +6292,7 @@ end: define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v24i32_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6246,6 +6313,11 @@ define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -6290,6 +6362,10 @@ define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -7211,6 +7287,7 @@ define inreg <48 x half> @bitcast_v24i32_to_v48f16_scalar(<24 x i32> inreg %a, i ; SI-LABEL: bitcast_v24i32_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_readfirstlane_b32 s15, v1 ; SI-NEXT: v_readfirstlane_b32 s14, v2 @@ -7223,7 +7300,6 @@ define inreg <48 x half> @bitcast_v24i32_to_v48f16_scalar(<24 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v9 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -10061,6 +10137,10 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX9-LABEL: bitcast_v48f16_to_v24i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -10086,10 +10166,6 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -10213,89 +10289,165 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -12185,8 +12337,8 @@ define <48 x i16> @bitcast_v24f32_to_v48i16(<24 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v24f32_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr51 @@ -12910,6 +13062,9 @@ define inreg <48 x i16> @bitcast_v24f32_to_v48i16_scalar(<24 x float> inreg %a, ; SI-LABEL: bitcast_v24f32_to_v48i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_mov_b32_e32 v23, s16 ; SI-NEXT: v_mov_b32_e32 v24, s17 @@ -12926,9 +13081,6 @@ define inreg <48 x i16> @bitcast_v24f32_to_v48i16_scalar(<24 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v11, s28 ; SI-NEXT: v_mov_b32_e32 v12, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[25:26], v[9:10], 16 @@ -15492,6 +15644,10 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX9-LABEL: bitcast_v48i16_to_v24f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -15517,10 +15673,6 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -15642,89 +15794,165 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -16053,16 +16281,7 @@ end: define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v24f32_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -16083,6 +16302,11 @@ define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -16127,6 +16351,10 @@ define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -17024,18 +17252,6 @@ define inreg <48 x half> @bitcast_v24f32_to_v48f16_scalar(<24 x float> inreg %a, ; SI-LABEL: bitcast_v24f32_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_readfirstlane_b32 s15, v1 -; SI-NEXT: v_readfirstlane_b32 s14, v2 -; SI-NEXT: v_readfirstlane_b32 s13, v3 -; SI-NEXT: v_readfirstlane_b32 s12, v4 -; SI-NEXT: v_readfirstlane_b32 s11, v5 -; SI-NEXT: v_readfirstlane_b32 s10, v6 -; SI-NEXT: v_readfirstlane_b32 s8, v7 -; SI-NEXT: v_readfirstlane_b32 s7, v8 -; SI-NEXT: v_readfirstlane_b32 s6, v9 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v10 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -17049,6 +17265,18 @@ define inreg <48 x half> @bitcast_v24f32_to_v48f16_scalar(<24 x float> inreg %a, ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; SI-NEXT: v_readfirstlane_b32 s15, v1 +; SI-NEXT: v_readfirstlane_b32 s14, v2 +; SI-NEXT: v_readfirstlane_b32 s13, v3 +; SI-NEXT: v_readfirstlane_b32 s12, v4 +; SI-NEXT: v_readfirstlane_b32 s11, v5 +; SI-NEXT: v_readfirstlane_b32 s10, v6 +; SI-NEXT: v_readfirstlane_b32 s8, v7 +; SI-NEXT: v_readfirstlane_b32 s7, v8 +; SI-NEXT: v_readfirstlane_b32 s6, v9 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v10 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -20039,6 +20267,10 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX9-LABEL: bitcast_v48f16_to_v24f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -20064,10 +20296,6 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -20191,89 +20419,165 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -21365,8 +21669,8 @@ define <48 x i16> @bitcast_v12i64_to_v48i16(<12 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v12i64_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr50 @@ -22130,10 +22434,11 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v12, s30, 0 -; SI-NEXT: v_writelane_b32 v12, s31, 1 +; SI-NEXT: v_writelane_b32 v12, s34, 0 +; SI-NEXT: v_writelane_b32 v12, s35, 1 +; SI-NEXT: v_writelane_b32 v12, s30, 2 +; SI-NEXT: v_writelane_b32 v12, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_writelane_b32 v12, s34, 2 ; SI-NEXT: v_readfirstlane_b32 s12, v1 ; SI-NEXT: v_readfirstlane_b32 s13, v2 ; SI-NEXT: v_readfirstlane_b32 s10, v3 @@ -22145,7 +22450,6 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v9 ; SI-NEXT: s_and_b64 s[14:15], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v10 -; SI-NEXT: v_writelane_b32 v12, s35, 3 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s88, s5, 16 @@ -22386,11 +22690,11 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x5c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v12, 2 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s35, v12, 3 -; SI-NEXT: v_readlane_b32 s34, v12, 2 -; SI-NEXT: v_readlane_b32 s31, v12, 1 -; SI-NEXT: v_readlane_b32 s30, v12, 0 +; SI-NEXT: v_readlane_b32 s31, v12, 3 +; SI-NEXT: v_readlane_b32 s35, v12, 1 +; SI-NEXT: v_readlane_b32 s34, v12, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -24592,6 +24896,10 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v48i16_to_v12i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -24617,10 +24925,6 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -24742,89 +25046,165 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -25153,16 +25533,7 @@ end: define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v12i64_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25183,6 +25554,11 @@ define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -25227,6 +25603,10 @@ define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -26160,6 +26540,7 @@ define inreg <48 x half> @bitcast_v12i64_to_v48f16_scalar(<12 x i64> inreg %a, i ; SI-LABEL: bitcast_v12i64_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 @@ -26172,7 +26553,6 @@ define inreg <48 x half> @bitcast_v12i64_to_v48f16_scalar(<12 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v9 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -29010,6 +29390,10 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX9-LABEL: bitcast_v48f16_to_v12i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -29035,10 +29419,6 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -29162,89 +29542,165 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -29574,8 +30030,8 @@ define <48 x i16> @bitcast_v12f64_to_v48i16(<12 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v12f64_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr50 @@ -30263,6 +30719,9 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a, ; SI-LABEL: bitcast_v12f64_to_v48i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_mov_b32_e32 v23, s16 ; SI-NEXT: v_mov_b32_e32 v24, s17 @@ -30279,9 +30738,6 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v11, s28 ; SI-NEXT: v_mov_b32_e32 v12, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[25:26], v[9:10], 16 @@ -32809,6 +33265,10 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX9-LABEL: bitcast_v48i16_to_v12f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -32834,10 +33294,6 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -32959,89 +33415,165 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -33370,16 +33902,7 @@ end: define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v12f64_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -33400,6 +33923,11 @@ define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -33444,6 +33972,10 @@ define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -34293,18 +34825,6 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a ; SI-LABEL: bitcast_v12f64_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_readfirstlane_b32 s12, v1 -; SI-NEXT: v_readfirstlane_b32 s13, v2 -; SI-NEXT: v_readfirstlane_b32 s10, v3 -; SI-NEXT: v_readfirstlane_b32 s11, v4 -; SI-NEXT: v_readfirstlane_b32 s8, v5 -; SI-NEXT: v_readfirstlane_b32 s9, v6 -; SI-NEXT: v_readfirstlane_b32 s6, v7 -; SI-NEXT: v_readfirstlane_b32 s7, v8 -; SI-NEXT: v_readfirstlane_b32 s4, v9 -; SI-NEXT: s_and_b64 s[14:15], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v10 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -34319,6 +34839,18 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; SI-NEXT: v_readfirstlane_b32 s12, v1 +; SI-NEXT: v_readfirstlane_b32 s13, v2 +; SI-NEXT: v_readfirstlane_b32 s10, v3 +; SI-NEXT: v_readfirstlane_b32 s11, v4 +; SI-NEXT: v_readfirstlane_b32 s8, v5 +; SI-NEXT: v_readfirstlane_b32 s9, v6 +; SI-NEXT: v_readfirstlane_b32 s6, v7 +; SI-NEXT: v_readfirstlane_b32 s7, v8 +; SI-NEXT: v_readfirstlane_b32 s4, v9 +; SI-NEXT: s_and_b64 s[14:15], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v10 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s14, s5, 16 @@ -37274,6 +37806,10 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX9-LABEL: bitcast_v48f16_to_v12f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -37299,10 +37835,6 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -37426,89 +37958,165 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll index 07cdbef82d892..282e7a7953de6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll @@ -2570,12 +2570,12 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v26i32_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -2866,11 +2866,11 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v26i32_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -3047,11 +3047,11 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26i32_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -3412,15 +3412,16 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v14, s30, 0 -; SI-NEXT: v_writelane_b32 v14, s31, 1 -; SI-NEXT: v_writelane_b32 v14, s34, 2 -; SI-NEXT: v_writelane_b32 v14, s35, 3 -; SI-NEXT: v_writelane_b32 v14, s36, 4 -; SI-NEXT: v_writelane_b32 v14, s37, 5 -; SI-NEXT: v_writelane_b32 v14, s38, 6 +; SI-NEXT: v_writelane_b32 v14, s34, 0 +; SI-NEXT: v_writelane_b32 v14, s35, 1 +; SI-NEXT: v_writelane_b32 v14, s36, 2 +; SI-NEXT: v_writelane_b32 v14, s37, 3 +; SI-NEXT: v_writelane_b32 v14, s38, 4 +; SI-NEXT: v_writelane_b32 v14, s39, 5 +; SI-NEXT: v_writelane_b32 v14, s48, 6 +; SI-NEXT: v_writelane_b32 v14, s30, 7 +; SI-NEXT: v_writelane_b32 v14, s31, 8 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_writelane_b32 v14, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 ; SI-NEXT: v_readfirstlane_b32 s12, v3 @@ -3434,7 +3435,6 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v11 ; SI-NEXT: s_and_b64 s[40:41], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v12 -; SI-NEXT: v_writelane_b32 v14, s48, 8 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s92, s5, 16 @@ -3693,16 +3693,16 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x64, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v14, 7 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s48, v14, 8 -; SI-NEXT: v_readlane_b32 s39, v14, 7 -; SI-NEXT: v_readlane_b32 s38, v14, 6 -; SI-NEXT: v_readlane_b32 s37, v14, 5 -; SI-NEXT: v_readlane_b32 s36, v14, 4 -; SI-NEXT: v_readlane_b32 s35, v14, 3 -; SI-NEXT: v_readlane_b32 s34, v14, 2 -; SI-NEXT: v_readlane_b32 s31, v14, 1 -; SI-NEXT: v_readlane_b32 s30, v14, 0 +; SI-NEXT: v_readlane_b32 s31, v14, 8 +; SI-NEXT: v_readlane_b32 s48, v14, 6 +; SI-NEXT: v_readlane_b32 s39, v14, 5 +; SI-NEXT: v_readlane_b32 s38, v14, 4 +; SI-NEXT: v_readlane_b32 s37, v14, 3 +; SI-NEXT: v_readlane_b32 s36, v14, 2 +; SI-NEXT: v_readlane_b32 s35, v14, 1 +; SI-NEXT: v_readlane_b32 s34, v14, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v14, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -6114,6 +6114,14 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v52i16_to_v26i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -6141,14 +6149,6 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -6286,90 +6286,167 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -6716,16 +6793,7 @@ end: define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v26i32_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6746,6 +6814,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -6800,6 +6873,10 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -7290,11 +7367,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v26i32_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -7471,11 +7548,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26i32_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -7832,6 +7909,11 @@ define inreg <52 x half> @bitcast_v26i32_to_v52f16_scalar(<26 x i32> inreg %a, i ; SI-LABEL: bitcast_v26i32_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_readfirstlane_b32 s41, v1 ; SI-NEXT: v_readfirstlane_b32 s40, v2 @@ -7846,11 +7928,6 @@ define inreg <52 x half> @bitcast_v26i32_to_v52f16_scalar(<26 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v11 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v12 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -10938,6 +11015,14 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v26i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -10965,14 +11050,6 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -11112,90 +11189,167 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -13185,12 +13339,12 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v26f32_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -13481,11 +13635,11 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v26f32_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -13662,11 +13816,11 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26f32_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -13997,6 +14151,14 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; SI-LABEL: bitcast_v26f32_to_v52i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_mov_b32_e32 v25, s16 ; SI-NEXT: v_mov_b32_e32 v26, s17 @@ -14013,14 +14175,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v13, s28 ; SI-NEXT: v_mov_b32_e32 v14, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[27:28], v[11:12], 16 @@ -14314,6 +14468,10 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; VI-LABEL: bitcast_v26f32_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v22, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -14330,10 +14488,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v19, s27 ; VI-NEXT: v_mov_b32_e32 v12, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -14519,6 +14673,10 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; GFX9-LABEL: bitcast_v26f32_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v22, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -14535,10 +14693,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v19, s27 ; GFX9-NEXT: v_mov_b32_e32 v12, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -16849,6 +17003,14 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX9-LABEL: bitcast_v52i16_to_v26f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -16876,14 +17038,6 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -17021,90 +17175,167 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -17451,16 +17682,7 @@ end: define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v26f32_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17481,6 +17703,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -17535,6 +17762,10 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -18025,11 +18256,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v26f32_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -18206,11 +18437,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26f32_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -18541,20 +18772,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; SI-LABEL: bitcast_v26f32_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_readfirstlane_b32 s41, v1 -; SI-NEXT: v_readfirstlane_b32 s40, v2 -; SI-NEXT: v_readfirstlane_b32 s15, v3 -; SI-NEXT: v_readfirstlane_b32 s14, v4 -; SI-NEXT: v_readfirstlane_b32 s13, v5 -; SI-NEXT: v_readfirstlane_b32 s12, v6 -; SI-NEXT: v_readfirstlane_b32 s11, v7 -; SI-NEXT: v_readfirstlane_b32 s10, v8 -; SI-NEXT: v_readfirstlane_b32 s8, v9 -; SI-NEXT: v_readfirstlane_b32 s7, v10 -; SI-NEXT: v_readfirstlane_b32 s6, v11 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v12 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -18571,6 +18788,20 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; SI-NEXT: v_readfirstlane_b32 s41, v1 +; SI-NEXT: v_readfirstlane_b32 s40, v2 +; SI-NEXT: v_readfirstlane_b32 s15, v3 +; SI-NEXT: v_readfirstlane_b32 s14, v4 +; SI-NEXT: v_readfirstlane_b32 s13, v5 +; SI-NEXT: v_readfirstlane_b32 s12, v6 +; SI-NEXT: v_readfirstlane_b32 s11, v7 +; SI-NEXT: v_readfirstlane_b32 s10, v8 +; SI-NEXT: v_readfirstlane_b32 s8, v9 +; SI-NEXT: v_readfirstlane_b32 s7, v10 +; SI-NEXT: v_readfirstlane_b32 s6, v11 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v12 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -19022,6 +19253,10 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; VI-LABEL: bitcast_v26f32_to_v52f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v22, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -19038,10 +19273,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v19, s27 ; VI-NEXT: v_mov_b32_e32 v12, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -19227,6 +19458,10 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; GFX9-LABEL: bitcast_v26f32_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v22, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -19243,10 +19478,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v19, s27 ; GFX9-NEXT: v_mov_b32_e32 v12, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -21831,6 +22062,14 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX9-LABEL: bitcast_v52f16_to_v26f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -21858,14 +22097,6 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -22005,90 +22236,167 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -23238,12 +23546,12 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v13i64_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -23534,11 +23842,11 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v13i64_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -23715,11 +24023,11 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13i64_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -24094,15 +24402,16 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v14, s30, 0 -; SI-NEXT: v_writelane_b32 v14, s31, 1 -; SI-NEXT: v_writelane_b32 v14, s34, 2 -; SI-NEXT: v_writelane_b32 v14, s35, 3 -; SI-NEXT: v_writelane_b32 v14, s36, 4 -; SI-NEXT: v_writelane_b32 v14, s37, 5 -; SI-NEXT: v_writelane_b32 v14, s38, 6 +; SI-NEXT: v_writelane_b32 v14, s34, 0 +; SI-NEXT: v_writelane_b32 v14, s35, 1 +; SI-NEXT: v_writelane_b32 v14, s36, 2 +; SI-NEXT: v_writelane_b32 v14, s37, 3 +; SI-NEXT: v_writelane_b32 v14, s38, 4 +; SI-NEXT: v_writelane_b32 v14, s39, 5 +; SI-NEXT: v_writelane_b32 v14, s48, 6 +; SI-NEXT: v_writelane_b32 v14, s30, 7 +; SI-NEXT: v_writelane_b32 v14, s31, 8 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_writelane_b32 v14, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 ; SI-NEXT: v_readfirstlane_b32 s12, v3 @@ -24116,7 +24425,6 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v11 ; SI-NEXT: s_and_b64 s[40:41], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v12 -; SI-NEXT: v_writelane_b32 v14, s48, 8 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s92, s5, 16 @@ -24375,16 +24683,16 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x64, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v14, 7 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s48, v14, 8 -; SI-NEXT: v_readlane_b32 s39, v14, 7 -; SI-NEXT: v_readlane_b32 s38, v14, 6 -; SI-NEXT: v_readlane_b32 s37, v14, 5 -; SI-NEXT: v_readlane_b32 s36, v14, 4 -; SI-NEXT: v_readlane_b32 s35, v14, 3 -; SI-NEXT: v_readlane_b32 s34, v14, 2 -; SI-NEXT: v_readlane_b32 s31, v14, 1 -; SI-NEXT: v_readlane_b32 s30, v14, 0 +; SI-NEXT: v_readlane_b32 s31, v14, 8 +; SI-NEXT: v_readlane_b32 s48, v14, 6 +; SI-NEXT: v_readlane_b32 s39, v14, 5 +; SI-NEXT: v_readlane_b32 s38, v14, 4 +; SI-NEXT: v_readlane_b32 s37, v14, 3 +; SI-NEXT: v_readlane_b32 s36, v14, 2 +; SI-NEXT: v_readlane_b32 s35, v14, 1 +; SI-NEXT: v_readlane_b32 s34, v14, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v14, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -26796,6 +27104,14 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v52i16_to_v13i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -26823,14 +27139,6 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -26968,90 +27276,167 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -27398,16 +27783,7 @@ end: define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v13i64_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -27428,6 +27804,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -27482,6 +27863,10 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -27973,11 +28358,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v13i64_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -28154,11 +28539,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13i64_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -28529,6 +28914,11 @@ define inreg <52 x half> @bitcast_v13i64_to_v52f16_scalar(<13 x i64> inreg %a, i ; SI-LABEL: bitcast_v13i64_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 @@ -28543,11 +28933,6 @@ define inreg <52 x half> @bitcast_v13i64_to_v52f16_scalar(<13 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v11 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v12 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -31635,6 +32020,14 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v13i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -31662,14 +32055,6 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -31809,90 +32194,167 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -32240,12 +32702,12 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v13f64_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -32523,11 +32985,11 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v13f64_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -32691,11 +33153,11 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13f64_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -33013,6 +33475,14 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; SI-LABEL: bitcast_v13f64_to_v52i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_mov_b32_e32 v25, s16 ; SI-NEXT: v_mov_b32_e32 v26, s17 @@ -33029,14 +33499,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v13, s28 ; SI-NEXT: v_mov_b32_e32 v14, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[27:28], v[11:12], 16 @@ -33317,6 +33779,10 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; VI-LABEL: bitcast_v13f64_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v21, s16 ; VI-NEXT: v_mov_b32_e32 v22, s17 @@ -33333,10 +33799,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v15, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -33509,6 +33971,10 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; GFX9-LABEL: bitcast_v13f64_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v21, s16 ; GFX9-NEXT: v_mov_b32_e32 v22, s17 @@ -33525,10 +33991,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v15, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -35826,6 +36288,14 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX9-LABEL: bitcast_v52i16_to_v13f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -35853,14 +36323,6 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -35998,90 +36460,167 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -36428,16 +36967,7 @@ end: define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v13f64_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -36458,6 +36988,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -36512,6 +37047,10 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -36976,11 +37515,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v13f64_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -37144,11 +37683,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13f64_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -37466,20 +38005,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; SI-LABEL: bitcast_v13f64_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_readfirstlane_b32 s14, v1 -; SI-NEXT: v_readfirstlane_b32 s15, v2 -; SI-NEXT: v_readfirstlane_b32 s12, v3 -; SI-NEXT: v_readfirstlane_b32 s13, v4 -; SI-NEXT: v_readfirstlane_b32 s10, v5 -; SI-NEXT: v_readfirstlane_b32 s11, v6 -; SI-NEXT: v_readfirstlane_b32 s8, v7 -; SI-NEXT: v_readfirstlane_b32 s9, v8 -; SI-NEXT: v_readfirstlane_b32 s6, v9 -; SI-NEXT: v_readfirstlane_b32 s7, v10 -; SI-NEXT: v_readfirstlane_b32 s4, v11 -; SI-NEXT: s_and_b64 s[40:41], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v12 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37496,6 +38021,20 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; SI-NEXT: v_readfirstlane_b32 s14, v1 +; SI-NEXT: v_readfirstlane_b32 s15, v2 +; SI-NEXT: v_readfirstlane_b32 s12, v3 +; SI-NEXT: v_readfirstlane_b32 s13, v4 +; SI-NEXT: v_readfirstlane_b32 s10, v5 +; SI-NEXT: v_readfirstlane_b32 s11, v6 +; SI-NEXT: v_readfirstlane_b32 s8, v7 +; SI-NEXT: v_readfirstlane_b32 s9, v8 +; SI-NEXT: v_readfirstlane_b32 s6, v9 +; SI-NEXT: v_readfirstlane_b32 s7, v10 +; SI-NEXT: v_readfirstlane_b32 s4, v11 +; SI-NEXT: s_and_b64 s[40:41], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v12 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s40, s5, 16 @@ -37934,6 +38473,10 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; VI-LABEL: bitcast_v13f64_to_v52f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v21, s16 ; VI-NEXT: v_mov_b32_e32 v22, s17 @@ -37950,10 +38493,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v15, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -38126,6 +38665,10 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; GFX9-LABEL: bitcast_v13f64_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v21, s16 ; GFX9-NEXT: v_mov_b32_e32 v22, s17 @@ -38142,10 +38685,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v15, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -40717,6 +41256,14 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX9-LABEL: bitcast_v52f16_to_v13f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -40744,14 +41291,6 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -40891,90 +41430,167 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -43549,6 +44165,10 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v52i16_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -43577,10 +44197,6 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -45783,6 +46399,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; VI-LABEL: bitcast_v52f16_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -45811,10 +46431,6 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -45979,6 +46595,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -46007,10 +46627,6 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll index 8eb71e90f8504..f6ff5be918706 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll @@ -2719,7 +2719,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v28i32_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -2729,6 +2728,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -3045,7 +3045,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v28i32_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -3054,6 +3053,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -3246,7 +3246,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28i32_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -3255,6 +3254,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -3641,20 +3641,21 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v16, s30, 0 -; SI-NEXT: v_writelane_b32 v16, s31, 1 -; SI-NEXT: v_writelane_b32 v16, s34, 2 -; SI-NEXT: v_writelane_b32 v16, s35, 3 -; SI-NEXT: v_writelane_b32 v16, s36, 4 -; SI-NEXT: v_writelane_b32 v16, s37, 5 -; SI-NEXT: v_writelane_b32 v16, s38, 6 -; SI-NEXT: v_writelane_b32 v16, s39, 7 -; SI-NEXT: v_writelane_b32 v16, s48, 8 -; SI-NEXT: v_writelane_b32 v16, s49, 9 -; SI-NEXT: v_writelane_b32 v16, s50, 10 -; SI-NEXT: v_writelane_b32 v16, s51, 11 +; SI-NEXT: v_writelane_b32 v16, s34, 0 +; SI-NEXT: v_writelane_b32 v16, s35, 1 +; SI-NEXT: v_writelane_b32 v16, s36, 2 +; SI-NEXT: v_writelane_b32 v16, s37, 3 +; SI-NEXT: v_writelane_b32 v16, s38, 4 +; SI-NEXT: v_writelane_b32 v16, s39, 5 +; SI-NEXT: v_writelane_b32 v16, s48, 6 +; SI-NEXT: v_writelane_b32 v16, s49, 7 +; SI-NEXT: v_writelane_b32 v16, s50, 8 +; SI-NEXT: v_writelane_b32 v16, s51, 9 +; SI-NEXT: v_writelane_b32 v16, s52, 10 +; SI-NEXT: v_writelane_b32 v16, s53, 11 +; SI-NEXT: v_writelane_b32 v16, s30, 12 +; SI-NEXT: v_writelane_b32 v16, s31, 13 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_writelane_b32 v16, s52, 12 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 ; SI-NEXT: v_readfirstlane_b32 s14, v3 @@ -3670,7 +3671,6 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v13 ; SI-NEXT: s_and_b64 s[42:43], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v14 -; SI-NEXT: v_writelane_b32 v16, s53, 13 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s30, s5, 16 @@ -3950,21 +3950,21 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x6c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v16, 12 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s53, v16, 13 -; SI-NEXT: v_readlane_b32 s52, v16, 12 -; SI-NEXT: v_readlane_b32 s51, v16, 11 -; SI-NEXT: v_readlane_b32 s50, v16, 10 -; SI-NEXT: v_readlane_b32 s49, v16, 9 -; SI-NEXT: v_readlane_b32 s48, v16, 8 -; SI-NEXT: v_readlane_b32 s39, v16, 7 -; SI-NEXT: v_readlane_b32 s38, v16, 6 -; SI-NEXT: v_readlane_b32 s37, v16, 5 -; SI-NEXT: v_readlane_b32 s36, v16, 4 -; SI-NEXT: v_readlane_b32 s35, v16, 3 -; SI-NEXT: v_readlane_b32 s34, v16, 2 -; SI-NEXT: v_readlane_b32 s31, v16, 1 -; SI-NEXT: v_readlane_b32 s30, v16, 0 +; SI-NEXT: v_readlane_b32 s31, v16, 13 +; SI-NEXT: v_readlane_b32 s53, v16, 11 +; SI-NEXT: v_readlane_b32 s52, v16, 10 +; SI-NEXT: v_readlane_b32 s51, v16, 9 +; SI-NEXT: v_readlane_b32 s50, v16, 8 +; SI-NEXT: v_readlane_b32 s49, v16, 7 +; SI-NEXT: v_readlane_b32 s48, v16, 6 +; SI-NEXT: v_readlane_b32 s39, v16, 5 +; SI-NEXT: v_readlane_b32 s38, v16, 4 +; SI-NEXT: v_readlane_b32 s37, v16, 3 +; SI-NEXT: v_readlane_b32 s36, v16, 2 +; SI-NEXT: v_readlane_b32 s35, v16, 1 +; SI-NEXT: v_readlane_b32 s34, v16, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -4007,10 +4007,11 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -4026,7 +4027,6 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -4200,6 +4200,7 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -4228,10 +4229,9 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -6585,6 +6585,18 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v56i16_to_v28i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -6614,18 +6626,6 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -6779,90 +6779,167 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -7224,6 +7301,22 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v28i32_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -7266,22 +7359,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -7867,7 +7944,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v28i32_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -7876,6 +7952,7 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -8068,7 +8145,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28i32_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -8077,6 +8153,7 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -8459,6 +8536,15 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; SI-LABEL: bitcast_v28i32_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_readfirstlane_b32 s43, v1 ; SI-NEXT: v_readfirstlane_b32 s42, v2 @@ -8475,15 +8561,6 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v13 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v14 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -8964,10 +9041,11 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -8983,7 +9061,6 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB17_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -9157,6 +9234,7 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -9185,10 +9263,9 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -11847,6 +11924,18 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v28i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -11876,18 +11965,6 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -12043,90 +12120,167 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -14225,7 +14379,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v28f32_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -14235,6 +14388,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -14551,7 +14705,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v28f32_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -14560,6 +14713,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -14752,7 +14906,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28f32_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -14761,6 +14914,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -15115,6 +15269,18 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; SI-LABEL: bitcast_v28f32_to_v56i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -15131,18 +15297,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v15, s28 ; SI-NEXT: v_mov_b32_e32 v16, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[29:30], v[13:14], 16 @@ -15460,6 +15614,14 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; VI-LABEL: bitcast_v28f32_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v20, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -15476,14 +15638,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v14, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -15687,6 +15841,14 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; GFX9-LABEL: bitcast_v28f32_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v20, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -15703,14 +15865,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v14, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -18210,6 +18364,18 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX9-LABEL: bitcast_v56i16_to_v28f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -18239,18 +18405,6 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -18404,90 +18558,167 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -18849,6 +19080,22 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v28f32_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -18891,22 +19138,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -19492,7 +19723,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v28f32_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -19501,6 +19731,7 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -19693,7 +19924,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28f32_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -19702,6 +19932,7 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -20056,22 +20287,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; SI-LABEL: bitcast_v28f32_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_readfirstlane_b32 s43, v1 -; SI-NEXT: v_readfirstlane_b32 s42, v2 -; SI-NEXT: v_readfirstlane_b32 s41, v3 -; SI-NEXT: v_readfirstlane_b32 s40, v4 -; SI-NEXT: v_readfirstlane_b32 s15, v5 -; SI-NEXT: v_readfirstlane_b32 s14, v6 -; SI-NEXT: v_readfirstlane_b32 s13, v7 -; SI-NEXT: v_readfirstlane_b32 s12, v8 -; SI-NEXT: v_readfirstlane_b32 s11, v9 -; SI-NEXT: v_readfirstlane_b32 s10, v10 -; SI-NEXT: v_readfirstlane_b32 s8, v11 -; SI-NEXT: v_readfirstlane_b32 s7, v12 -; SI-NEXT: v_readfirstlane_b32 s6, v13 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v14 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -20088,6 +20303,22 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; SI-NEXT: v_readfirstlane_b32 s43, v1 +; SI-NEXT: v_readfirstlane_b32 s42, v2 +; SI-NEXT: v_readfirstlane_b32 s41, v3 +; SI-NEXT: v_readfirstlane_b32 s40, v4 +; SI-NEXT: v_readfirstlane_b32 s15, v5 +; SI-NEXT: v_readfirstlane_b32 s14, v6 +; SI-NEXT: v_readfirstlane_b32 s13, v7 +; SI-NEXT: v_readfirstlane_b32 s12, v8 +; SI-NEXT: v_readfirstlane_b32 s11, v9 +; SI-NEXT: v_readfirstlane_b32 s10, v10 +; SI-NEXT: v_readfirstlane_b32 s8, v11 +; SI-NEXT: v_readfirstlane_b32 s7, v12 +; SI-NEXT: v_readfirstlane_b32 s6, v13 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v14 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -20575,6 +20806,14 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; VI-LABEL: bitcast_v28f32_to_v56f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v20, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -20591,14 +20830,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v14, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -20802,6 +21033,14 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; GFX9-LABEL: bitcast_v28f32_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v20, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -20818,14 +21057,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v14, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -23630,6 +23861,18 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX9-LABEL: bitcast_v56f16_to_v28f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -23659,18 +23902,6 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -23826,90 +24057,167 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -25120,7 +25428,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v14i64_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -25130,6 +25437,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -25446,7 +25754,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v14i64_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -25455,6 +25762,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -25647,7 +25955,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14i64_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -25656,6 +25963,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -26056,20 +26364,21 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v16, s30, 0 -; SI-NEXT: v_writelane_b32 v16, s31, 1 -; SI-NEXT: v_writelane_b32 v16, s34, 2 -; SI-NEXT: v_writelane_b32 v16, s35, 3 -; SI-NEXT: v_writelane_b32 v16, s36, 4 -; SI-NEXT: v_writelane_b32 v16, s37, 5 -; SI-NEXT: v_writelane_b32 v16, s38, 6 -; SI-NEXT: v_writelane_b32 v16, s39, 7 -; SI-NEXT: v_writelane_b32 v16, s48, 8 -; SI-NEXT: v_writelane_b32 v16, s49, 9 -; SI-NEXT: v_writelane_b32 v16, s50, 10 -; SI-NEXT: v_writelane_b32 v16, s51, 11 +; SI-NEXT: v_writelane_b32 v16, s34, 0 +; SI-NEXT: v_writelane_b32 v16, s35, 1 +; SI-NEXT: v_writelane_b32 v16, s36, 2 +; SI-NEXT: v_writelane_b32 v16, s37, 3 +; SI-NEXT: v_writelane_b32 v16, s38, 4 +; SI-NEXT: v_writelane_b32 v16, s39, 5 +; SI-NEXT: v_writelane_b32 v16, s48, 6 +; SI-NEXT: v_writelane_b32 v16, s49, 7 +; SI-NEXT: v_writelane_b32 v16, s50, 8 +; SI-NEXT: v_writelane_b32 v16, s51, 9 +; SI-NEXT: v_writelane_b32 v16, s52, 10 +; SI-NEXT: v_writelane_b32 v16, s53, 11 +; SI-NEXT: v_writelane_b32 v16, s30, 12 +; SI-NEXT: v_writelane_b32 v16, s31, 13 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_writelane_b32 v16, s52, 12 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 ; SI-NEXT: v_readfirstlane_b32 s14, v3 @@ -26085,7 +26394,6 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v13 ; SI-NEXT: s_and_b64 s[42:43], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v14 -; SI-NEXT: v_writelane_b32 v16, s53, 13 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s30, s5, 16 @@ -26365,21 +26673,21 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x6c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v16, 12 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s53, v16, 13 -; SI-NEXT: v_readlane_b32 s52, v16, 12 -; SI-NEXT: v_readlane_b32 s51, v16, 11 -; SI-NEXT: v_readlane_b32 s50, v16, 10 -; SI-NEXT: v_readlane_b32 s49, v16, 9 -; SI-NEXT: v_readlane_b32 s48, v16, 8 -; SI-NEXT: v_readlane_b32 s39, v16, 7 -; SI-NEXT: v_readlane_b32 s38, v16, 6 -; SI-NEXT: v_readlane_b32 s37, v16, 5 -; SI-NEXT: v_readlane_b32 s36, v16, 4 -; SI-NEXT: v_readlane_b32 s35, v16, 3 -; SI-NEXT: v_readlane_b32 s34, v16, 2 -; SI-NEXT: v_readlane_b32 s31, v16, 1 -; SI-NEXT: v_readlane_b32 s30, v16, 0 +; SI-NEXT: v_readlane_b32 s31, v16, 13 +; SI-NEXT: v_readlane_b32 s53, v16, 11 +; SI-NEXT: v_readlane_b32 s52, v16, 10 +; SI-NEXT: v_readlane_b32 s51, v16, 9 +; SI-NEXT: v_readlane_b32 s50, v16, 8 +; SI-NEXT: v_readlane_b32 s49, v16, 7 +; SI-NEXT: v_readlane_b32 s48, v16, 6 +; SI-NEXT: v_readlane_b32 s39, v16, 5 +; SI-NEXT: v_readlane_b32 s38, v16, 4 +; SI-NEXT: v_readlane_b32 s37, v16, 3 +; SI-NEXT: v_readlane_b32 s36, v16, 2 +; SI-NEXT: v_readlane_b32 s35, v16, 1 +; SI-NEXT: v_readlane_b32 s34, v16, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -26422,10 +26730,11 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -26441,7 +26750,6 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB41_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -26615,6 +26923,7 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -26643,10 +26952,9 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -29000,6 +29308,18 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v56i16_to_v14i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -29029,18 +29349,6 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -29194,90 +29502,167 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -29639,6 +30024,22 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v14i64_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -29681,22 +30082,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -30282,7 +30667,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v14i64_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -30291,6 +30675,7 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -30483,7 +30868,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14i64_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -30492,6 +30876,7 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -30888,6 +31273,15 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; SI-LABEL: bitcast_v14i64_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 @@ -30904,15 +31298,6 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v13 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v14 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -31393,10 +31778,11 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -31412,7 +31798,6 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB45_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -31586,6 +31971,7 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -31614,10 +32000,9 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -34276,6 +34661,18 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v14i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -34305,18 +34702,6 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -34472,90 +34857,167 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -34917,7 +35379,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v14f64_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -34927,6 +35388,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -35229,7 +35691,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v14f64_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -35238,6 +35699,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -35416,7 +35878,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14f64_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -35425,6 +35886,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -35765,6 +36227,18 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; SI-LABEL: bitcast_v14f64_to_v56i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -35781,18 +36255,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v15, s28 ; SI-NEXT: v_mov_b32_e32 v16, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[29:30], v[13:14], 16 @@ -36096,6 +36558,14 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; VI-LABEL: bitcast_v14f64_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -36112,14 +36582,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v17, s28 ; VI-NEXT: v_mov_b32_e32 v18, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -36309,6 +36771,14 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; GFX9-LABEL: bitcast_v14f64_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -36325,14 +36795,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v17, s28 ; GFX9-NEXT: v_mov_b32_e32 v18, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -38818,6 +39280,18 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX9-LABEL: bitcast_v56i16_to_v14f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -38847,18 +39321,6 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -39012,90 +39474,167 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -39457,6 +39996,22 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v14f64_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -39499,22 +40054,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr55 ; SI-NEXT: ; implicit-def: $vgpr40 @@ -40071,7 +40610,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v14f64_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -40080,6 +40618,7 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -40258,7 +40797,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14f64_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -40267,6 +40805,7 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -40607,22 +41146,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; SI-LABEL: bitcast_v14f64_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_readfirstlane_b32 s40, v1 -; SI-NEXT: v_readfirstlane_b32 s41, v2 -; SI-NEXT: v_readfirstlane_b32 s14, v3 -; SI-NEXT: v_readfirstlane_b32 s15, v4 -; SI-NEXT: v_readfirstlane_b32 s12, v5 -; SI-NEXT: v_readfirstlane_b32 s13, v6 -; SI-NEXT: v_readfirstlane_b32 s10, v7 -; SI-NEXT: v_readfirstlane_b32 s11, v8 -; SI-NEXT: v_readfirstlane_b32 s8, v9 -; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: v_readfirstlane_b32 s6, v11 -; SI-NEXT: v_readfirstlane_b32 s7, v12 -; SI-NEXT: v_readfirstlane_b32 s4, v13 -; SI-NEXT: s_and_b64 s[42:43], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v14 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -40639,6 +41162,22 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; SI-NEXT: v_readfirstlane_b32 s40, v1 +; SI-NEXT: v_readfirstlane_b32 s41, v2 +; SI-NEXT: v_readfirstlane_b32 s14, v3 +; SI-NEXT: v_readfirstlane_b32 s15, v4 +; SI-NEXT: v_readfirstlane_b32 s12, v5 +; SI-NEXT: v_readfirstlane_b32 s13, v6 +; SI-NEXT: v_readfirstlane_b32 s10, v7 +; SI-NEXT: v_readfirstlane_b32 s11, v8 +; SI-NEXT: v_readfirstlane_b32 s8, v9 +; SI-NEXT: v_readfirstlane_b32 s9, v10 +; SI-NEXT: v_readfirstlane_b32 s6, v11 +; SI-NEXT: v_readfirstlane_b32 s7, v12 +; SI-NEXT: v_readfirstlane_b32 s4, v13 +; SI-NEXT: s_and_b64 s[42:43], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v14 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s42, s5, 16 @@ -41120,6 +41659,14 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; VI-LABEL: bitcast_v14f64_to_v56f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -41136,14 +41683,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v17, s28 ; VI-NEXT: v_mov_b32_e32 v18, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -41333,6 +41872,14 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; GFX9-LABEL: bitcast_v14f64_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -41349,14 +41896,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v17, s28 ; GFX9-NEXT: v_mov_b32_e32 v18, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -44147,6 +44686,18 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX9-LABEL: bitcast_v56f16_to_v14f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -44176,18 +44727,6 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -44343,90 +44882,167 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -47265,6 +47881,14 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v56i16_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -47295,14 +47919,6 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -49736,6 +50352,14 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; VI-LABEL: bitcast_v56f16_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -49766,14 +50390,6 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -49952,6 +50568,14 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -49982,14 +50606,6 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll index 93c11f13ce3ce..134980045bb53 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll @@ -2849,7 +2849,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v30i32_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -2863,6 +2862,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -2892,7 +2892,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr37 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -3205,7 +3205,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v30i32_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -3218,6 +3217,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -3426,7 +3426,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30i32_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -3439,6 +3438,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -3851,23 +3851,24 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v18, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v18, s30, 0 -; SI-NEXT: v_writelane_b32 v18, s31, 1 -; SI-NEXT: v_writelane_b32 v18, s34, 2 -; SI-NEXT: v_writelane_b32 v18, s35, 3 -; SI-NEXT: v_writelane_b32 v18, s36, 4 -; SI-NEXT: v_writelane_b32 v18, s37, 5 -; SI-NEXT: v_writelane_b32 v18, s38, 6 -; SI-NEXT: v_writelane_b32 v18, s39, 7 -; SI-NEXT: v_writelane_b32 v18, s48, 8 -; SI-NEXT: v_writelane_b32 v18, s49, 9 -; SI-NEXT: v_writelane_b32 v18, s50, 10 -; SI-NEXT: v_writelane_b32 v18, s51, 11 -; SI-NEXT: v_writelane_b32 v18, s52, 12 -; SI-NEXT: v_writelane_b32 v18, s53, 13 -; SI-NEXT: v_writelane_b32 v18, s54, 14 +; SI-NEXT: v_writelane_b32 v18, s34, 0 +; SI-NEXT: v_writelane_b32 v18, s35, 1 +; SI-NEXT: v_writelane_b32 v18, s36, 2 +; SI-NEXT: v_writelane_b32 v18, s37, 3 +; SI-NEXT: v_writelane_b32 v18, s38, 4 +; SI-NEXT: v_writelane_b32 v18, s39, 5 +; SI-NEXT: v_writelane_b32 v18, s48, 6 +; SI-NEXT: v_writelane_b32 v18, s49, 7 +; SI-NEXT: v_writelane_b32 v18, s50, 8 +; SI-NEXT: v_writelane_b32 v18, s51, 9 +; SI-NEXT: v_writelane_b32 v18, s52, 10 +; SI-NEXT: v_writelane_b32 v18, s53, 11 +; SI-NEXT: v_writelane_b32 v18, s54, 12 +; SI-NEXT: v_writelane_b32 v18, s55, 13 +; SI-NEXT: v_writelane_b32 v18, s64, 14 +; SI-NEXT: v_writelane_b32 v18, s30, 15 +; SI-NEXT: v_writelane_b32 v18, s31, 16 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_writelane_b32 v18, s55, 15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 ; SI-NEXT: v_readfirstlane_b32 s40, v3 @@ -3885,7 +3886,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: v_writelane_b32 v18, s64, 16 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s34, s5, 16 @@ -4183,24 +4183,24 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x74, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v18, 15 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s64, v18, 16 -; SI-NEXT: v_readlane_b32 s55, v18, 15 -; SI-NEXT: v_readlane_b32 s54, v18, 14 -; SI-NEXT: v_readlane_b32 s53, v18, 13 -; SI-NEXT: v_readlane_b32 s52, v18, 12 -; SI-NEXT: v_readlane_b32 s51, v18, 11 -; SI-NEXT: v_readlane_b32 s50, v18, 10 -; SI-NEXT: v_readlane_b32 s49, v18, 9 -; SI-NEXT: v_readlane_b32 s48, v18, 8 -; SI-NEXT: v_readlane_b32 s39, v18, 7 -; SI-NEXT: v_readlane_b32 s38, v18, 6 -; SI-NEXT: v_readlane_b32 s37, v18, 5 -; SI-NEXT: v_readlane_b32 s36, v18, 4 -; SI-NEXT: v_readlane_b32 s35, v18, 3 -; SI-NEXT: v_readlane_b32 s34, v18, 2 -; SI-NEXT: v_readlane_b32 s31, v18, 1 -; SI-NEXT: v_readlane_b32 s30, v18, 0 +; SI-NEXT: v_readlane_b32 s31, v18, 16 +; SI-NEXT: v_readlane_b32 s64, v18, 14 +; SI-NEXT: v_readlane_b32 s55, v18, 13 +; SI-NEXT: v_readlane_b32 s54, v18, 12 +; SI-NEXT: v_readlane_b32 s53, v18, 11 +; SI-NEXT: v_readlane_b32 s52, v18, 10 +; SI-NEXT: v_readlane_b32 s51, v18, 9 +; SI-NEXT: v_readlane_b32 s50, v18, 8 +; SI-NEXT: v_readlane_b32 s49, v18, 7 +; SI-NEXT: v_readlane_b32 s48, v18, 6 +; SI-NEXT: v_readlane_b32 s39, v18, 5 +; SI-NEXT: v_readlane_b32 s38, v18, 4 +; SI-NEXT: v_readlane_b32 s37, v18, 3 +; SI-NEXT: v_readlane_b32 s36, v18, 2 +; SI-NEXT: v_readlane_b32 s35, v18, 1 +; SI-NEXT: v_readlane_b32 s34, v18, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v18, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -4245,14 +4245,15 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -4270,7 +4271,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -4456,6 +4456,7 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -4486,14 +4487,13 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -4538,10 +4538,11 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -4559,7 +4560,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB13_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -4685,6 +4685,7 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -4715,10 +4716,9 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -7024,6 +7024,22 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v60i16_to_v30i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -7055,22 +7071,6 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -7240,90 +7240,167 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -7701,6 +7778,22 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v30i32_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -7735,22 +7828,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -7784,7 +7861,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -8414,7 +8491,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v30i32_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -8427,6 +8503,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -8635,7 +8712,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30i32_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -8648,6 +8724,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -9056,6 +9133,19 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; SI-LABEL: bitcast_v30i32_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s45, v1 ; SI-NEXT: v_readfirstlane_b32 s44, v2 @@ -9074,19 +9164,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -9603,14 +9680,15 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -9628,7 +9706,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB17_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -9814,6 +9891,7 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -9844,14 +9922,13 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -9896,10 +9973,11 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -9917,7 +9995,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB17_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -10043,6 +10120,7 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -10073,10 +10151,9 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -12774,6 +12851,22 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v30i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -12805,22 +12898,6 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -12992,90 +13069,167 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -15272,7 +15426,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v30f32_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -15286,6 +15439,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -15315,7 +15469,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr37 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -15628,7 +15782,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v30f32_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -15641,6 +15794,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -15849,7 +16003,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30f32_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -15862,6 +16015,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -16240,6 +16394,21 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; SI-LABEL: bitcast_v30f32_to_v60i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -16256,21 +16425,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v17, s28 ; SI-NEXT: v_mov_b32_e32 v18, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[31:32], v[15:16], 16 @@ -16611,6 +16765,18 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; VI-LABEL: bitcast_v30f32_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -16627,18 +16793,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v20, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -16860,6 +17014,18 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; GFX9-LABEL: bitcast_v30f32_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -16876,18 +17042,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v20, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -19562,6 +19716,22 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX9-LABEL: bitcast_v60i16_to_v30f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -19593,22 +19763,6 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -19778,90 +19932,167 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -20239,6 +20470,22 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v30f32_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -20273,22 +20520,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -20322,7 +20553,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -20952,7 +21183,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v30f32_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -20965,6 +21195,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -21173,7 +21404,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30f32_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -21186,6 +21416,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -21564,6 +21795,22 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; SI-LABEL: bitcast_v30f32_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s45, v1 ; SI-NEXT: v_readfirstlane_b32 s44, v2 @@ -21582,22 +21829,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -22130,6 +22361,18 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; VI-LABEL: bitcast_v30f32_to_v60f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -22146,18 +22389,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v20, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -22379,6 +22610,18 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; GFX9-LABEL: bitcast_v30f32_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -22395,18 +22638,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v20, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -25473,6 +25704,22 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX9-LABEL: bitcast_v60f16_to_v30f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -25504,22 +25751,6 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -25691,90 +25922,167 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -27041,7 +27349,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v15i64_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -27055,6 +27362,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -27084,7 +27392,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -27397,7 +27705,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v15i64_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -27410,6 +27717,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -27618,7 +27926,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15i64_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -27631,6 +27938,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -28059,23 +28367,24 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v18, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v18, s30, 0 -; SI-NEXT: v_writelane_b32 v18, s31, 1 -; SI-NEXT: v_writelane_b32 v18, s34, 2 -; SI-NEXT: v_writelane_b32 v18, s35, 3 -; SI-NEXT: v_writelane_b32 v18, s36, 4 -; SI-NEXT: v_writelane_b32 v18, s37, 5 -; SI-NEXT: v_writelane_b32 v18, s38, 6 -; SI-NEXT: v_writelane_b32 v18, s39, 7 -; SI-NEXT: v_writelane_b32 v18, s48, 8 -; SI-NEXT: v_writelane_b32 v18, s49, 9 -; SI-NEXT: v_writelane_b32 v18, s50, 10 -; SI-NEXT: v_writelane_b32 v18, s51, 11 -; SI-NEXT: v_writelane_b32 v18, s52, 12 -; SI-NEXT: v_writelane_b32 v18, s53, 13 -; SI-NEXT: v_writelane_b32 v18, s54, 14 +; SI-NEXT: v_writelane_b32 v18, s34, 0 +; SI-NEXT: v_writelane_b32 v18, s35, 1 +; SI-NEXT: v_writelane_b32 v18, s36, 2 +; SI-NEXT: v_writelane_b32 v18, s37, 3 +; SI-NEXT: v_writelane_b32 v18, s38, 4 +; SI-NEXT: v_writelane_b32 v18, s39, 5 +; SI-NEXT: v_writelane_b32 v18, s48, 6 +; SI-NEXT: v_writelane_b32 v18, s49, 7 +; SI-NEXT: v_writelane_b32 v18, s50, 8 +; SI-NEXT: v_writelane_b32 v18, s51, 9 +; SI-NEXT: v_writelane_b32 v18, s52, 10 +; SI-NEXT: v_writelane_b32 v18, s53, 11 +; SI-NEXT: v_writelane_b32 v18, s54, 12 +; SI-NEXT: v_writelane_b32 v18, s55, 13 +; SI-NEXT: v_writelane_b32 v18, s64, 14 +; SI-NEXT: v_writelane_b32 v18, s30, 15 +; SI-NEXT: v_writelane_b32 v18, s31, 16 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_writelane_b32 v18, s55, 15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 ; SI-NEXT: v_readfirstlane_b32 s40, v3 @@ -28093,7 +28402,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: v_writelane_b32 v18, s64, 16 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s34, s5, 16 @@ -28391,24 +28699,24 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x74, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v18, 15 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s64, v18, 16 -; SI-NEXT: v_readlane_b32 s55, v18, 15 -; SI-NEXT: v_readlane_b32 s54, v18, 14 -; SI-NEXT: v_readlane_b32 s53, v18, 13 -; SI-NEXT: v_readlane_b32 s52, v18, 12 -; SI-NEXT: v_readlane_b32 s51, v18, 11 -; SI-NEXT: v_readlane_b32 s50, v18, 10 -; SI-NEXT: v_readlane_b32 s49, v18, 9 -; SI-NEXT: v_readlane_b32 s48, v18, 8 -; SI-NEXT: v_readlane_b32 s39, v18, 7 -; SI-NEXT: v_readlane_b32 s38, v18, 6 -; SI-NEXT: v_readlane_b32 s37, v18, 5 -; SI-NEXT: v_readlane_b32 s36, v18, 4 -; SI-NEXT: v_readlane_b32 s35, v18, 3 -; SI-NEXT: v_readlane_b32 s34, v18, 2 -; SI-NEXT: v_readlane_b32 s31, v18, 1 -; SI-NEXT: v_readlane_b32 s30, v18, 0 +; SI-NEXT: v_readlane_b32 s31, v18, 16 +; SI-NEXT: v_readlane_b32 s64, v18, 14 +; SI-NEXT: v_readlane_b32 s55, v18, 13 +; SI-NEXT: v_readlane_b32 s54, v18, 12 +; SI-NEXT: v_readlane_b32 s53, v18, 11 +; SI-NEXT: v_readlane_b32 s52, v18, 10 +; SI-NEXT: v_readlane_b32 s51, v18, 9 +; SI-NEXT: v_readlane_b32 s50, v18, 8 +; SI-NEXT: v_readlane_b32 s49, v18, 7 +; SI-NEXT: v_readlane_b32 s48, v18, 6 +; SI-NEXT: v_readlane_b32 s39, v18, 5 +; SI-NEXT: v_readlane_b32 s38, v18, 4 +; SI-NEXT: v_readlane_b32 s37, v18, 3 +; SI-NEXT: v_readlane_b32 s36, v18, 2 +; SI-NEXT: v_readlane_b32 s35, v18, 1 +; SI-NEXT: v_readlane_b32 s34, v18, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v18, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -28453,14 +28761,15 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -28478,7 +28787,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB41_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -28664,6 +28972,7 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -28694,14 +29003,13 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -28746,10 +29054,11 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -28767,7 +29076,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB41_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -28893,6 +29201,7 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -28923,10 +29232,9 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -31232,6 +31540,22 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v60i16_to_v15i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -31263,22 +31587,6 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -31448,90 +31756,167 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -31909,6 +32294,22 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v15i64_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -31943,22 +32344,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -31992,7 +32377,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -32623,7 +33008,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v15i64_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -32636,6 +33020,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -32844,7 +33229,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15i64_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -32857,6 +33241,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -33281,6 +33666,19 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; SI-LABEL: bitcast_v15i64_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 @@ -33299,19 +33697,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -33828,14 +34213,15 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -33853,7 +34239,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB45_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -34039,6 +34424,7 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -34069,14 +34455,13 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -34121,10 +34506,11 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -34142,7 +34528,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB45_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -34268,6 +34653,7 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -34298,10 +34684,9 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -36999,6 +37384,22 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v15i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -37030,22 +37431,6 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -37217,90 +37602,167 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -37678,7 +38140,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v15f64_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -37692,6 +38153,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -37721,7 +38183,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -38019,7 +38481,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v15f64_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -38032,6 +38493,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -38225,7 +38687,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15f64_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -38238,6 +38699,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -38601,22 +39063,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; SI-LABEL: bitcast_v15f64_to_v60i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_mov_b32_e32 v27, s16 -; SI-NEXT: v_mov_b32_e32 v28, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v25, s20 -; SI-NEXT: v_mov_b32_e32 v26, s21 -; SI-NEXT: v_mov_b32_e32 v23, s22 -; SI-NEXT: v_mov_b32_e32 v24, s23 -; SI-NEXT: v_mov_b32_e32 v21, s24 -; SI-NEXT: v_mov_b32_e32 v22, s25 -; SI-NEXT: v_mov_b32_e32 v19, s26 -; SI-NEXT: v_mov_b32_e32 v20, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v17, s28 -; SI-NEXT: v_mov_b32_e32 v18, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -38633,6 +39079,22 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: v_mov_b32_e32 v27, s16 +; SI-NEXT: v_mov_b32_e32 v28, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v25, s20 +; SI-NEXT: v_mov_b32_e32 v26, s21 +; SI-NEXT: v_mov_b32_e32 v23, s22 +; SI-NEXT: v_mov_b32_e32 v24, s23 +; SI-NEXT: v_mov_b32_e32 v21, s24 +; SI-NEXT: v_mov_b32_e32 v22, s25 +; SI-NEXT: v_mov_b32_e32 v19, s26 +; SI-NEXT: v_mov_b32_e32 v20, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v17, s28 +; SI-NEXT: v_mov_b32_e32 v18, s29 ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[31:32], v[15:16], 16 @@ -38959,6 +39421,18 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; VI-LABEL: bitcast_v15f64_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v17, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -38975,18 +39449,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v20, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -39193,6 +39655,18 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; GFX9-LABEL: bitcast_v15f64_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v17, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -39209,18 +39683,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v20, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -41880,6 +42342,22 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX9-LABEL: bitcast_v60i16_to_v15f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -41911,22 +42389,6 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -42096,90 +42558,167 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -42557,6 +43096,22 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v15f64_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr38 ; SI-NEXT: ; kill: killed $vgpr38 @@ -42579,22 +43134,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr38 ; SI-NEXT: ; kill: killed $vgpr38 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr55 @@ -42627,7 +43166,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; kill: killed $vgpr38 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -43240,7 +43779,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v15f64_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -43253,6 +43791,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -43446,7 +43985,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15f64_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -43459,6 +43997,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -43822,6 +44361,22 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; SI-LABEL: bitcast_v15f64_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 @@ -43840,22 +44395,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s44, s5, 16 @@ -44378,6 +44917,18 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; VI-LABEL: bitcast_v15f64_to_v60f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v17, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -44394,18 +44945,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v20, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -44612,6 +45151,18 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; GFX9-LABEL: bitcast_v15f64_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v17, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -44628,18 +45179,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v20, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -47691,6 +48230,22 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX9-LABEL: bitcast_v60f16_to_v15f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -47722,22 +48277,6 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -47909,90 +48448,167 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -51092,6 +51708,18 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v60i16_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -51124,18 +51752,6 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -53772,6 +54388,18 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; VI-LABEL: bitcast_v60f16_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -53804,18 +54432,6 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v30, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -54008,6 +54624,18 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -54040,18 +54668,6 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll index 36e2db0c4879d..a4882f1119e70 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll @@ -420,6 +420,7 @@ define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: s_clause 0x1 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 +; GISEL-GFX11-NEXT: ; meta instruction ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 @@ -461,6 +462,7 @@ define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; DAGISEL-GFX11-NEXT: s_clause 0x1 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 +; DAGISEL-GFX11-NEXT: ; meta instruction ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 0329f23ea434f..954812c09d19a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -118,32 +118,32 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 ; CHECK-NEXT: s_mov_b32 s50, s15 @@ -177,21 +177,21 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -258,30 +258,30 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v42, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v3 ; CHECK-NEXT: v_mov_b32_e32 v40, v2 @@ -313,20 +313,20 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -400,32 +400,32 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 ; CHECK-NEXT: s_mov_b32 s50, s15 @@ -459,21 +459,21 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -542,30 +542,30 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v42, s16, 14 -; CHECK-NEXT: v_writelane_b32 v42, s30, 0 -; CHECK-NEXT: v_writelane_b32 v42, s31, 1 -; CHECK-NEXT: v_writelane_b32 v42, s34, 2 -; CHECK-NEXT: v_writelane_b32 v42, s35, 3 -; CHECK-NEXT: v_writelane_b32 v42, s36, 4 -; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s38, 6 -; CHECK-NEXT: v_writelane_b32 v42, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v42, s48, 8 -; CHECK-NEXT: v_writelane_b32 v42, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v42, s34, 0 +; CHECK-NEXT: v_writelane_b32 v42, s35, 1 +; CHECK-NEXT: v_writelane_b32 v42, s36, 2 +; CHECK-NEXT: v_writelane_b32 v42, s37, 3 +; CHECK-NEXT: v_writelane_b32 v42, s38, 4 +; CHECK-NEXT: v_writelane_b32 v42, s39, 5 +; CHECK-NEXT: v_writelane_b32 v42, s48, 6 +; CHECK-NEXT: v_writelane_b32 v42, s49, 7 +; CHECK-NEXT: v_writelane_b32 v42, s50, 8 +; CHECK-NEXT: v_writelane_b32 v42, s51, 9 +; CHECK-NEXT: v_writelane_b32 v42, s52, 10 +; CHECK-NEXT: v_writelane_b32 v42, s53, 11 +; CHECK-NEXT: v_writelane_b32 v42, s30, 12 +; CHECK-NEXT: v_writelane_b32 v42, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v42, s50, 10 -; CHECK-NEXT: v_writelane_b32 v42, s51, 11 -; CHECK-NEXT: v_writelane_b32 v42, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v42, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -596,20 +596,20 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v42, 13 -; CHECK-NEXT: v_readlane_b32 s52, v42, 12 -; CHECK-NEXT: v_readlane_b32 s51, v42, 11 -; CHECK-NEXT: v_readlane_b32 s50, v42, 10 -; CHECK-NEXT: v_readlane_b32 s49, v42, 9 -; CHECK-NEXT: v_readlane_b32 s48, v42, 8 -; CHECK-NEXT: v_readlane_b32 s39, v42, 7 -; CHECK-NEXT: v_readlane_b32 s38, v42, 6 -; CHECK-NEXT: v_readlane_b32 s37, v42, 5 -; CHECK-NEXT: v_readlane_b32 s36, v42, 4 -; CHECK-NEXT: v_readlane_b32 s35, v42, 3 -; CHECK-NEXT: v_readlane_b32 s34, v42, 2 -; CHECK-NEXT: v_readlane_b32 s31, v42, 1 -; CHECK-NEXT: v_readlane_b32 s30, v42, 0 +; CHECK-NEXT: v_readlane_b32 s30, v42, 12 +; CHECK-NEXT: v_readlane_b32 s31, v42, 13 +; CHECK-NEXT: v_readlane_b32 s53, v42, 11 +; CHECK-NEXT: v_readlane_b32 s52, v42, 10 +; CHECK-NEXT: v_readlane_b32 s51, v42, 9 +; CHECK-NEXT: v_readlane_b32 s50, v42, 8 +; CHECK-NEXT: v_readlane_b32 s49, v42, 7 +; CHECK-NEXT: v_readlane_b32 s48, v42, 6 +; CHECK-NEXT: v_readlane_b32 s39, v42, 5 +; CHECK-NEXT: v_readlane_b32 s38, v42, 4 +; CHECK-NEXT: v_readlane_b32 s37, v42, 3 +; CHECK-NEXT: v_readlane_b32 s36, v42, 2 +; CHECK-NEXT: v_readlane_b32 s35, v42, 1 +; CHECK-NEXT: v_readlane_b32 s34, v42, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v42, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -683,32 +683,32 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v41, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -741,21 +741,21 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll index 583b6fe0a81ca..d4b07768e92a2 100644 --- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll +++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll @@ -205,17 +205,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v3, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v3, s30, 0 -; GFX8-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX8-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-NEXT: v_readlane_b32 s30, v3, 0 +; GFX8-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -233,17 +233,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s3, s33, 8 ; GFX8-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s3 ; 4-byte Folded Spill ; GFX8-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX8-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX8-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX8-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX8-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s3, s33, 8 @@ -261,17 +261,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[16:17] +; GFX9-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[16:17] ; GFX9-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX9-NEXT: v_writelane_b32 v3, s30, 0 -; GFX9-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -288,17 +288,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX9-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX9-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s33 ; 4-byte Folded Spill ; GFX9-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX9-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX9-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX9-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX9-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX9-ARCH-FLAT-NEXT: scratch_load_dword v3, off, s33 ; 4-byte Folded Reload @@ -315,17 +315,18 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX942-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s33 ; 4-byte Folded Spill ; GFX942-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX942-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX942-ARCH-FLAT-NEXT: s_nop 0 +; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX942-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX942-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX942-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX942-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX942-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX942-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-ARCH-FLAT-NEXT: scratch_load_dword v3, off, s33 ; 4-byte Folded Reload @@ -343,17 +344,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX10-NEXT: v_readlane_b32 s31, v3, 1 ; GFX10-NEXT: v_readlane_b32 s30, v3, 0 +; GFX10-NEXT: v_readlane_b32 s31, v3, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index 4f1a6cb2c48d8..ceb271bd57233 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -496,17 +496,29 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 4352 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 2816 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 2560 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 2304 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2048 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 1792 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 1536 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1280 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1024 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 768 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 512 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 256 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 0 ; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec ; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 711d57baac15f..35b9d9d4996da 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -4392,8 +4392,8 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GCN-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v2, 1 ; GCN-NEXT: v_readlane_b32 s30, v2, 0 +; GCN-NEXT: v_readlane_b32 s31, v2, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4410,21 +4410,21 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v2, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v2, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v2, s30, 0 -; GFX7-NEXT: v_writelane_b32 v2, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_readlane_b32 s30, v2, 0 ; GFX7-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v2, 1 -; GFX7-NEXT: v_readlane_b32 s30, v2, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4441,19 +4441,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v2, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v2, s30, 0 -; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v2, 1 -; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4470,19 +4470,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v2, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v2, s30, 0 -; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v2, 1 -; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4499,19 +4499,20 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v4, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v4, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v4, s30, 0 -; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: scratch_store_short v1, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v4, 1 -; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v4, off, s33 ; 4-byte Folded Reload @@ -4529,19 +4530,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4559,19 +4560,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: scratch_store_b16 v1, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -4590,18 +4591,18 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v4, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v4, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: scratch_store_b16 v1, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v4, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -4645,8 +4646,8 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v2, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v4, 1 ; GCN-NEXT: v_readlane_b32 s30, v4, 0 +; GCN-NEXT: v_readlane_b32 s31, v4, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4663,26 +4664,26 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v4, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v4, s30, 0 -; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_add_i32_e32 v3, vcc, 2, v2 +; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v4, 1 -; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4699,19 +4700,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v2, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v2, s30, 0 -; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v2, 1 -; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4728,19 +4729,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v2, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v2, s30, 0 -; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v2, 1 -; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4757,19 +4758,20 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v4, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v4, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v4, s30, 0 -; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: scratch_store_dword v1, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v4, 1 -; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v4, off, s33 ; 4-byte Folded Reload @@ -4787,19 +4789,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4817,19 +4819,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: scratch_store_b32 v1, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -4848,18 +4850,18 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v4, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v4, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: scratch_store_b32 v1, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v4, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -4905,8 +4907,8 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, v3, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v5, 1 ; GCN-NEXT: v_readlane_b32 s30, v5, 0 +; GCN-NEXT: v_readlane_b32 s31, v5, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4923,13 +4925,13 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v4, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v4, s30, 0 -; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 @@ -4939,12 +4941,12 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v2 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 4, v3 +; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_dword v0, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v4, 1 -; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4961,22 +4963,22 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v4, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v4, s30, 0 -; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v2 +; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: buffer_store_short v1, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v4, 1 -; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4993,21 +4995,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v3, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v3, s30, 0 -; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v3, 1 -; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5024,22 +5026,23 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: v_mov_b32_e32 v4, v2 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_short v4, v1, off offset:4 sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: scratch_store_dword v4, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5057,21 +5060,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v3, 1 -; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5089,21 +5092,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v3, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: scratch_store_b16 v2, v1, off offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 v2, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v3, 1 -; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v3, off, s33 ; 4-byte Folded Reload @@ -5122,21 +5125,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 -; GFX1250-NEXT: v_mov_b32_e32 v4, v2 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: v_mov_b32_e32 v4, v2 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b16 v4, v1, off offset:4 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: scratch_store_b32 v4, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5190,8 +5193,8 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v4, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v8, 1 ; GCN-NEXT: v_readlane_b32 s30, v8, 0 +; GCN-NEXT: v_readlane_b32 s31, v8, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5208,13 +5211,13 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v6, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v6, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v6, s30, 0 -; GFX7-NEXT: v_writelane_b32 v6, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 @@ -5231,13 +5234,13 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v4 +; GFX7-NEXT: v_readlane_b32 s30, v6, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v4, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v6, 1 -; GFX7-NEXT: v_readlane_b32 s30, v6, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5254,22 +5257,22 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v4, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v4, s30, 0 -; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v2 +; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v4, 1 -; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5286,21 +5289,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v3, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v3, s30, 0 -; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v3, 1 -; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5317,20 +5320,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: v_mov_b32_e32 v4, v2 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5348,21 +5352,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v3, 1 -; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5380,19 +5384,19 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v3, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: scratch_store_b64 v2, v[0:1], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v3, 1 -; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v3, off, s33 ; 4-byte Folded Reload @@ -5411,19 +5415,19 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 -; GFX1250-NEXT: v_mov_b32_e32 v4, v2 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: v_mov_b32_e32 v4, v2 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5497,8 +5501,8 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v8, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v16, 1 ; GCN-NEXT: v_readlane_b32 s30, v16, 0 +; GCN-NEXT: v_readlane_b32 s31, v16, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5515,13 +5519,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v10, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v10, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v10, s30, 0 -; GFX7-NEXT: v_writelane_b32 v10, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7 @@ -5558,13 +5562,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v8 +; GFX7-NEXT: v_readlane_b32 s30, v10, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v8, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v10, 1 -; GFX7-NEXT: v_readlane_b32 s30, v10, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5581,13 +5585,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v6, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v6, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v6, s30, 0 -; GFX8-NEXT: v_writelane_b32 v6, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 12, v4 @@ -5597,12 +5601,12 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 4, v4 +; GFX8-NEXT: v_readlane_b32 s30, v6, 0 ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v6, 1 -; GFX8-NEXT: v_readlane_b32 s30, v6, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5619,15 +5623,16 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v5, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v5, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v5, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v5, s30, 0 -; GFX900-NEXT: v_writelane_b32 v5, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v5, 0 ; GFX900-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:12 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:8 @@ -5637,7 +5642,6 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v5, 1 -; GFX900-NEXT: v_readlane_b32 s30, v5, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5654,19 +5658,20 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_dwordx4 v4, v[0:3], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5684,15 +5689,16 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v5, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v5, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v5, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v5, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v5, 0 ; GFX10-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:12 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:8 @@ -5702,7 +5708,6 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v5, 1 -; GFX10-NEXT: v_readlane_b32 s30, v5, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5720,19 +5725,19 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v5, 0 ; GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v5, 1 -; GFX11-NEXT: v_readlane_b32 s30, v5, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v5, off, s33 ; 4-byte Folded Reload @@ -5751,18 +5756,18 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b128 v4, v[0:3], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5876,8 +5881,8 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v16, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v20, 1 ; GCN-NEXT: v_readlane_b32 s30, v20, 0 +; GCN-NEXT: v_readlane_b32 s31, v20, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5894,13 +5899,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v18, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v18, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v18, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v18, s30, 0 -; GFX7-NEXT: v_writelane_b32 v18, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15 @@ -5977,13 +5982,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v16 +; GFX7-NEXT: v_readlane_b32 s30, v18, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v16, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v18, 1 -; GFX7-NEXT: v_readlane_b32 s30, v18, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v18, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6000,13 +6005,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v10, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v10, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v10, s30, 0 -; GFX8-NEXT: v_writelane_b32 v10, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v9, vcc, 28, v8 @@ -6028,12 +6033,12 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 4, v8 +; GFX8-NEXT: v_readlane_b32 s30, v10, 0 ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v10, 1 -; GFX8-NEXT: v_readlane_b32 s30, v10, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6050,15 +6055,16 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v9, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v9, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v9, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v9, s30, 0 -; GFX900-NEXT: v_writelane_b32 v9, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v9, 0 ; GFX900-NEXT: buffer_store_dword v7, v8, s[0:3], 0 offen offset:28 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v6, v8, s[0:3], 0 offen offset:24 @@ -6076,7 +6082,6 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v9, 1 -; GFX900-NEXT: v_readlane_b32 s30, v9, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v9, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6093,21 +6098,22 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v9, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v9, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v9, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v9, s30, 0 -; GFX950-NEXT: v_writelane_b32 v9, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v9, 0 ; GFX950-NEXT: scratch_store_dwordx4 v8, v[4:7], off offset:16 sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: scratch_store_dwordx4 v8, v[0:3], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v9, 1 -; GFX950-NEXT: v_readlane_b32 s30, v9, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v9, off, s33 ; 4-byte Folded Reload @@ -6125,15 +6131,16 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v9, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v9, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v9, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v9, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v9, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v9, 0 ; GFX10-NEXT: buffer_store_dword v7, v8, s[0:3], 0 offen offset:28 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v6, v8, s[0:3], 0 offen offset:24 @@ -6151,7 +6158,6 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v9, 1 -; GFX10-NEXT: v_readlane_b32 s30, v9, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6169,21 +6175,21 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v9, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v9, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v9, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v9, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v9, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v9, 0 ; GFX11-NEXT: scratch_store_b128 v8, v[4:7], off offset:16 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b128 v8, v[0:3], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v9, 1 -; GFX11-NEXT: v_readlane_b32 s30, v9, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v9, off, s33 ; 4-byte Folded Reload @@ -6202,20 +6208,20 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v9, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v9, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v9, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v9, 0 ; GFX1250-NEXT: scratch_store_b128 v8, v[4:7], off offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: scratch_store_b128 v8, v[0:3], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v9, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v9, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -9518,6 +9524,17 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX8-LABEL: global_extload_v32bf16_to_v32f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v1 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v1 @@ -9552,17 +9569,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_addc_u32_e32 v34, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v35, vcc, 36, v1 ; GFX8-NEXT: v_addc_u32_e32 v36, vcc, 0, v2, vcc -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX8-NEXT: v_add_u32_e32 v37, vcc, 38, v1 ; GFX8-NEXT: flat_load_ushort v44, v[1:2] ; GFX8-NEXT: v_addc_u32_e32 v38, vcc, 0, v2, vcc @@ -10021,16 +10027,21 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-LABEL: global_extload_v32bf16_to_v32f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v3, v2 -; GFX950-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-NEXT: v_mov_b32_e32 v2, v1 ; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:2 ; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:12 ; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:8 @@ -10063,11 +10074,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:48 ; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:54 ; GFX950-NEXT: global_load_ushort v58, v[2:3], off offset:58 -; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(31) ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX950-NEXT: s_waitcnt vmcnt(30) @@ -14251,12 +14257,12 @@ define <32 x bfloat> @v_fadd_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_fadd_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -19959,12 +19965,12 @@ define <32 x bfloat> @v_fmul_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_fmul_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -25150,12 +25156,12 @@ define <32 x bfloat> @v_minnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_minnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -29726,12 +29732,12 @@ define <32 x bfloat> @v_maxnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_maxnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -48801,6 +48807,14 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v34, s34, 0 +; GFX8-NEXT: v_writelane_b32 v34, s35, 1 +; GFX8-NEXT: v_writelane_b32 v34, s36, 2 +; GFX8-NEXT: v_writelane_b32 v34, s37, 3 +; GFX8-NEXT: v_writelane_b32 v34, s38, 4 +; GFX8-NEXT: v_writelane_b32 v34, s39, 5 +; GFX8-NEXT: v_writelane_b32 v34, s30, 6 +; GFX8-NEXT: v_writelane_b32 v34, s31, 7 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 @@ -48852,26 +48866,18 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_and_b32_e32 v0, 1, v24 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v25 -; GFX8-NEXT: v_writelane_b32 v34, s30, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v26 -; GFX8-NEXT: v_writelane_b32 v34, s31, 1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v27 -; GFX8-NEXT: v_writelane_b32 v34, s34, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v28 -; GFX8-NEXT: v_writelane_b32 v34, s35, 3 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v29 -; GFX8-NEXT: v_writelane_b32 v34, s36, 4 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v30 -; GFX8-NEXT: v_writelane_b32 v34, s37, 5 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 ; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX8-NEXT: v_writelane_b32 v34, s38, 6 -; GFX8-NEXT: v_writelane_b32 v34, s39, 7 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 @@ -48997,6 +49003,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v28 ; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v26 ; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v24 +; GFX8-NEXT: v_readlane_b32 s30, v34, 6 ; GFX8-NEXT: v_or_b32_sdwa v8, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v9, v18, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v10, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -49005,14 +49012,13 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_or_b32_sdwa v13, v29, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v14, v27, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v15, v25, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_readlane_b32 s39, v34, 7 -; GFX8-NEXT: v_readlane_b32 s38, v34, 6 -; GFX8-NEXT: v_readlane_b32 s37, v34, 5 -; GFX8-NEXT: v_readlane_b32 s36, v34, 4 -; GFX8-NEXT: v_readlane_b32 s35, v34, 3 -; GFX8-NEXT: v_readlane_b32 s34, v34, 2 -; GFX8-NEXT: v_readlane_b32 s31, v34, 1 -; GFX8-NEXT: v_readlane_b32 s30, v34, 0 +; GFX8-NEXT: v_readlane_b32 s31, v34, 7 +; GFX8-NEXT: v_readlane_b32 s39, v34, 5 +; GFX8-NEXT: v_readlane_b32 s38, v34, 4 +; GFX8-NEXT: v_readlane_b32 s37, v34, 3 +; GFX8-NEXT: v_readlane_b32 s36, v34, 2 +; GFX8-NEXT: v_readlane_b32 s35, v34, 1 +; GFX8-NEXT: v_readlane_b32 s34, v34, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] @@ -49025,6 +49031,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v33, s34, 0 +; GFX900-NEXT: v_writelane_b32 v33, s35, 1 +; GFX900-NEXT: v_writelane_b32 v33, s30, 2 +; GFX900-NEXT: v_writelane_b32 v33, s31, 3 ; GFX900-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX900-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 ; GFX900-NEXT: v_and_b32_e32 v0, 1, v3 @@ -49084,11 +49094,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_and_b32_e32 v0, 1, v28 ; GFX900-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0 ; GFX900-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX900-NEXT: v_writelane_b32 v33, s30, 0 -; GFX900-NEXT: v_writelane_b32 v33, s31, 1 -; GFX900-NEXT: v_writelane_b32 v33, s34, 2 ; GFX900-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX900-NEXT: v_writelane_b32 v33, s35, 3 ; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_and_b32_e32 v0, 1, v0 @@ -49193,6 +49199,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_mov_b32 s4, 0x5040100 +; GFX900-NEXT: v_readlane_b32 s30, v33, 2 ; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 ; GFX900-NEXT: v_perm_b32 v1, v2, v5, s4 ; GFX900-NEXT: v_perm_b32 v2, v4, v7, s4 @@ -49209,10 +49216,9 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_perm_b32 v13, v26, v29, s4 ; GFX900-NEXT: v_perm_b32 v14, v28, v32, s4 ; GFX900-NEXT: v_perm_b32 v15, v31, v30, s4 -; GFX900-NEXT: v_readlane_b32 s35, v33, 3 -; GFX900-NEXT: v_readlane_b32 s34, v33, 2 -; GFX900-NEXT: v_readlane_b32 s31, v33, 1 -; GFX900-NEXT: v_readlane_b32 s30, v33, 0 +; GFX900-NEXT: v_readlane_b32 s31, v33, 3 +; GFX900-NEXT: v_readlane_b32 s35, v33, 1 +; GFX900-NEXT: v_readlane_b32 s34, v33, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -49228,6 +49234,12 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 offset:60 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:124 ; GFX950-NEXT: scratch_load_ushort v33, off, s32 @@ -49252,17 +49264,11 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:40 ; GFX950-NEXT: v_and_b32_e32 v29, 1, v29 -; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v29 ; GFX950-NEXT: scratch_load_dword v29, off, s32 offset:84 ; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:20 ; GFX950-NEXT: v_and_b32_e32 v28, 1, v28 -; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v28 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v26, 1, v26 ; GFX950-NEXT: v_and_b32_e32 v27, 1, v27 ; GFX950-NEXT: v_and_b32_e32 v24, 1, v24 @@ -54681,6 +54687,22 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-LABEL: v_fma_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:64 ; GFX950-NEXT: scratch_load_dword v36, off, s32 ; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:60 @@ -54698,14 +54720,6 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:16 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:20 ; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:24 -; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v43, 0xffff0000, v14 ; GFX950-NEXT: v_lshlrev_b32_e32 v45, 16, v14 ; GFX950-NEXT: v_and_b32_e32 v46, 0xffff0000, v29 @@ -54714,20 +54728,12 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-NEXT: v_lshlrev_b32_e32 v61, 16, v12 ; GFX950-NEXT: v_and_b32_e32 v62, 0xffff0000, v27 ; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v30 ; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v30 ; GFX950-NEXT: v_and_b32_e32 v47, 0xffff0000, v13 ; GFX950-NEXT: v_lshlrev_b32_e32 v57, 16, v13 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v15 ; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v15 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v58, 0xffff0000, v28 ; GFX950-NEXT: v_lshlrev_b32_e32 v60, 16, v28 ; GFX950-NEXT: s_waitcnt vmcnt(16) diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index ab2ad19d0f1bf..2f6f9e45cafbf 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -902,47 +902,47 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v0, s30, 0 -; CHECK-NEXT: v_writelane_b32 v0, s31, 1 -; CHECK-NEXT: v_writelane_b32 v0, s33, 2 -; CHECK-NEXT: v_writelane_b32 v0, s34, 3 -; CHECK-NEXT: v_writelane_b32 v0, s35, 4 -; CHECK-NEXT: v_writelane_b32 v0, s36, 5 -; CHECK-NEXT: v_writelane_b32 v0, s37, 6 -; CHECK-NEXT: v_writelane_b32 v0, s38, 7 -; CHECK-NEXT: v_writelane_b32 v0, s39, 8 -; CHECK-NEXT: v_writelane_b32 v0, s48, 9 -; CHECK-NEXT: v_writelane_b32 v0, s49, 10 -; CHECK-NEXT: v_writelane_b32 v0, s50, 11 -; CHECK-NEXT: v_writelane_b32 v0, s51, 12 -; CHECK-NEXT: v_writelane_b32 v0, s52, 13 -; CHECK-NEXT: v_writelane_b32 v0, s53, 14 -; CHECK-NEXT: v_writelane_b32 v0, s54, 15 -; CHECK-NEXT: v_writelane_b32 v0, s55, 16 -; CHECK-NEXT: v_writelane_b32 v0, s64, 17 -; CHECK-NEXT: v_writelane_b32 v0, s65, 18 -; CHECK-NEXT: v_writelane_b32 v0, s66, 19 -; CHECK-NEXT: v_writelane_b32 v0, s67, 20 -; CHECK-NEXT: v_writelane_b32 v0, s68, 21 -; CHECK-NEXT: v_writelane_b32 v0, s69, 22 -; CHECK-NEXT: v_writelane_b32 v0, s70, 23 -; CHECK-NEXT: v_writelane_b32 v0, s71, 24 -; CHECK-NEXT: v_writelane_b32 v0, s80, 25 -; CHECK-NEXT: v_writelane_b32 v0, s81, 26 -; CHECK-NEXT: v_writelane_b32 v0, s82, 27 -; CHECK-NEXT: v_writelane_b32 v0, s83, 28 -; CHECK-NEXT: v_writelane_b32 v0, s84, 29 -; CHECK-NEXT: v_writelane_b32 v0, s85, 30 -; CHECK-NEXT: v_writelane_b32 v0, s86, 31 -; CHECK-NEXT: v_writelane_b32 v0, s87, 32 -; CHECK-NEXT: v_writelane_b32 v0, s96, 33 -; CHECK-NEXT: v_writelane_b32 v0, s97, 34 -; CHECK-NEXT: v_writelane_b32 v0, s98, 35 -; CHECK-NEXT: v_writelane_b32 v0, s99, 36 +; CHECK-NEXT: v_writelane_b32 v0, s33, 0 +; CHECK-NEXT: v_writelane_b32 v0, s34, 1 +; CHECK-NEXT: v_writelane_b32 v0, s35, 2 +; CHECK-NEXT: v_writelane_b32 v0, s36, 3 +; CHECK-NEXT: v_writelane_b32 v0, s37, 4 +; CHECK-NEXT: v_writelane_b32 v0, s38, 5 +; CHECK-NEXT: v_writelane_b32 v0, s39, 6 +; CHECK-NEXT: v_writelane_b32 v0, s48, 7 +; CHECK-NEXT: v_writelane_b32 v0, s49, 8 +; CHECK-NEXT: v_writelane_b32 v0, s50, 9 +; CHECK-NEXT: v_writelane_b32 v0, s51, 10 +; CHECK-NEXT: v_writelane_b32 v0, s52, 11 +; CHECK-NEXT: v_writelane_b32 v0, s53, 12 +; CHECK-NEXT: v_writelane_b32 v0, s54, 13 +; CHECK-NEXT: v_writelane_b32 v0, s55, 14 +; CHECK-NEXT: v_writelane_b32 v0, s64, 15 +; CHECK-NEXT: v_writelane_b32 v0, s65, 16 +; CHECK-NEXT: v_writelane_b32 v0, s66, 17 +; CHECK-NEXT: v_writelane_b32 v0, s67, 18 +; CHECK-NEXT: v_writelane_b32 v0, s68, 19 +; CHECK-NEXT: v_writelane_b32 v0, s69, 20 +; CHECK-NEXT: v_writelane_b32 v0, s70, 21 +; CHECK-NEXT: v_writelane_b32 v0, s71, 22 +; CHECK-NEXT: v_writelane_b32 v0, s80, 23 +; CHECK-NEXT: v_writelane_b32 v0, s81, 24 +; CHECK-NEXT: v_writelane_b32 v0, s82, 25 +; CHECK-NEXT: v_writelane_b32 v0, s83, 26 +; CHECK-NEXT: v_writelane_b32 v0, s84, 27 +; CHECK-NEXT: v_writelane_b32 v0, s85, 28 +; CHECK-NEXT: v_writelane_b32 v0, s86, 29 +; CHECK-NEXT: v_writelane_b32 v0, s87, 30 +; CHECK-NEXT: v_writelane_b32 v0, s96, 31 +; CHECK-NEXT: v_writelane_b32 v0, s97, 32 +; CHECK-NEXT: v_writelane_b32 v0, s98, 33 +; CHECK-NEXT: v_writelane_b32 v0, s99, 34 +; CHECK-NEXT: v_writelane_b32 v0, s100, 35 +; CHECK-NEXT: v_writelane_b32 v0, s101, 36 +; CHECK-NEXT: v_writelane_b32 v0, s30, 37 +; CHECK-NEXT: v_writelane_b32 v0, s31, 38 ; CHECK-NEXT: s_mov_b32 s40, s12 -; CHECK-NEXT: v_writelane_b32 v0, s100, 37 ; CHECK-NEXT: s_cmp_eq_u32 s40, 0 -; CHECK-NEXT: v_writelane_b32 v0, s101, 38 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1380,6 +1380,7 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s31 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s30, v0, 37 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s32 ; CHECK-NEXT: ;;#ASMEND @@ -1596,45 +1597,44 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s101, v0, 38 -; CHECK-NEXT: v_readlane_b32 s100, v0, 37 -; CHECK-NEXT: v_readlane_b32 s99, v0, 36 -; CHECK-NEXT: v_readlane_b32 s98, v0, 35 -; CHECK-NEXT: v_readlane_b32 s97, v0, 34 -; CHECK-NEXT: v_readlane_b32 s96, v0, 33 -; CHECK-NEXT: v_readlane_b32 s87, v0, 32 -; CHECK-NEXT: v_readlane_b32 s86, v0, 31 -; CHECK-NEXT: v_readlane_b32 s85, v0, 30 -; CHECK-NEXT: v_readlane_b32 s84, v0, 29 -; CHECK-NEXT: v_readlane_b32 s83, v0, 28 -; CHECK-NEXT: v_readlane_b32 s82, v0, 27 -; CHECK-NEXT: v_readlane_b32 s81, v0, 26 -; CHECK-NEXT: v_readlane_b32 s80, v0, 25 -; CHECK-NEXT: v_readlane_b32 s71, v0, 24 -; CHECK-NEXT: v_readlane_b32 s70, v0, 23 -; CHECK-NEXT: v_readlane_b32 s69, v0, 22 -; CHECK-NEXT: v_readlane_b32 s68, v0, 21 -; CHECK-NEXT: v_readlane_b32 s67, v0, 20 -; CHECK-NEXT: v_readlane_b32 s66, v0, 19 -; CHECK-NEXT: v_readlane_b32 s65, v0, 18 -; CHECK-NEXT: v_readlane_b32 s64, v0, 17 -; CHECK-NEXT: v_readlane_b32 s55, v0, 16 -; CHECK-NEXT: v_readlane_b32 s54, v0, 15 -; CHECK-NEXT: v_readlane_b32 s53, v0, 14 -; CHECK-NEXT: v_readlane_b32 s52, v0, 13 -; CHECK-NEXT: v_readlane_b32 s51, v0, 12 -; CHECK-NEXT: v_readlane_b32 s50, v0, 11 -; CHECK-NEXT: v_readlane_b32 s49, v0, 10 -; CHECK-NEXT: v_readlane_b32 s48, v0, 9 -; CHECK-NEXT: v_readlane_b32 s39, v0, 8 -; CHECK-NEXT: v_readlane_b32 s38, v0, 7 -; CHECK-NEXT: v_readlane_b32 s37, v0, 6 -; CHECK-NEXT: v_readlane_b32 s36, v0, 5 -; CHECK-NEXT: v_readlane_b32 s35, v0, 4 -; CHECK-NEXT: v_readlane_b32 s34, v0, 3 -; CHECK-NEXT: v_readlane_b32 s33, v0, 2 -; CHECK-NEXT: v_readlane_b32 s31, v0, 1 -; CHECK-NEXT: v_readlane_b32 s30, v0, 0 +; CHECK-NEXT: v_readlane_b32 s31, v0, 38 +; CHECK-NEXT: v_readlane_b32 s101, v0, 36 +; CHECK-NEXT: v_readlane_b32 s100, v0, 35 +; CHECK-NEXT: v_readlane_b32 s99, v0, 34 +; CHECK-NEXT: v_readlane_b32 s98, v0, 33 +; CHECK-NEXT: v_readlane_b32 s97, v0, 32 +; CHECK-NEXT: v_readlane_b32 s96, v0, 31 +; CHECK-NEXT: v_readlane_b32 s87, v0, 30 +; CHECK-NEXT: v_readlane_b32 s86, v0, 29 +; CHECK-NEXT: v_readlane_b32 s85, v0, 28 +; CHECK-NEXT: v_readlane_b32 s84, v0, 27 +; CHECK-NEXT: v_readlane_b32 s83, v0, 26 +; CHECK-NEXT: v_readlane_b32 s82, v0, 25 +; CHECK-NEXT: v_readlane_b32 s81, v0, 24 +; CHECK-NEXT: v_readlane_b32 s80, v0, 23 +; CHECK-NEXT: v_readlane_b32 s71, v0, 22 +; CHECK-NEXT: v_readlane_b32 s70, v0, 21 +; CHECK-NEXT: v_readlane_b32 s69, v0, 20 +; CHECK-NEXT: v_readlane_b32 s68, v0, 19 +; CHECK-NEXT: v_readlane_b32 s67, v0, 18 +; CHECK-NEXT: v_readlane_b32 s66, v0, 17 +; CHECK-NEXT: v_readlane_b32 s65, v0, 16 +; CHECK-NEXT: v_readlane_b32 s64, v0, 15 +; CHECK-NEXT: v_readlane_b32 s55, v0, 14 +; CHECK-NEXT: v_readlane_b32 s54, v0, 13 +; CHECK-NEXT: v_readlane_b32 s53, v0, 12 +; CHECK-NEXT: v_readlane_b32 s52, v0, 11 +; CHECK-NEXT: v_readlane_b32 s51, v0, 10 +; CHECK-NEXT: v_readlane_b32 s50, v0, 9 +; CHECK-NEXT: v_readlane_b32 s49, v0, 8 +; CHECK-NEXT: v_readlane_b32 s48, v0, 7 +; CHECK-NEXT: v_readlane_b32 s39, v0, 6 +; CHECK-NEXT: v_readlane_b32 s38, v0, 5 +; CHECK-NEXT: v_readlane_b32 s37, v0, 4 +; CHECK-NEXT: v_readlane_b32 s36, v0, 3 +; CHECK-NEXT: v_readlane_b32 s35, v0, 2 +; CHECK-NEXT: v_readlane_b32 s34, v0, 1 +; CHECK-NEXT: v_readlane_b32 s33, v0, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index 1e04fc1da938f..445250d4e77e4 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -41,16 +41,16 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i8_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i8_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -71,14 +71,14 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -101,16 +101,16 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -131,14 +131,14 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -161,16 +161,16 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -191,14 +191,14 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -221,17 +221,17 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -252,14 +252,14 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -282,17 +282,17 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -313,14 +313,14 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -343,18 +343,18 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s19, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -375,14 +375,14 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s3, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -405,8 +405,9 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12 @@ -414,10 +415,9 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -438,14 +438,14 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -468,8 +468,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[26:27] ; GFX9-NEXT: v_writelane_b32 v40, s24, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[24:25] ; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12 @@ -481,10 +482,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX9-NEXT: s_mov_b32 s17, s21 ; GFX9-NEXT: s_mov_b32 s18, s22 ; GFX9-NEXT: s_mov_b32 s19, s23 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -505,14 +505,14 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s20, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[20:21] ; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -535,16 +535,16 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -565,14 +565,14 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -595,16 +595,16 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -625,14 +625,14 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -655,16 +655,16 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -685,14 +685,14 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -715,17 +715,17 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -746,14 +746,14 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -776,16 +776,16 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -806,14 +806,14 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -837,16 +837,16 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -867,14 +867,14 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -897,17 +897,17 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -928,14 +928,14 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -958,17 +958,17 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -989,14 +989,14 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1019,17 +1019,17 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1050,14 +1050,14 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p0_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p0_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1080,17 +1080,17 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1111,14 +1111,14 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1141,16 +1141,16 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p3_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p3_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1171,14 +1171,14 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p3_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p3_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1201,8 +1201,9 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12 @@ -1210,10 +1211,9 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1234,14 +1234,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v2p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v2p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1264,17 +1264,17 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1295,14 +1295,14 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2p5_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2p5_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1325,8 +1325,9 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[22:23] ; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 @@ -1335,10 +1336,9 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 ; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1359,14 +1359,14 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1389,8 +1389,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[40:41] ; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12 @@ -1407,10 +1408,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_mov_b32 s22, s26 ; GFX9-NEXT: s_mov_b32 s23, s27 ; GFX9-NEXT: s_mov_b32 s24, s28 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1431,14 +1431,14 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX11-NEXT: v_writelane_b32 v40, s25, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[26:27] ; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1463,8 +1463,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[22:23] ; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 @@ -1482,10 +1483,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX9-NEXT: s_mov_b32 s11, s18 ; GFX9-NEXT: s_mov_b32 s15, s19 ; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1506,14 +1506,14 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 2f2d2005ea2ae..4e0b16792aad4 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -5853,7 +5853,9 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 11 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 12 @@ -5863,7 +5865,6 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 14 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 15 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -5899,10 +5900,9 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 9 ; VI-NEXT: v_mov_b32_e32 v29, 9 ; VI-NEXT: v_mov_b32_e32 v30, 10 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -5921,7 +5921,9 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 11 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 12 @@ -5931,7 +5933,6 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 14 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 15 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -5967,10 +5968,9 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 9 ; CI-NEXT: v_mov_b32_e32 v29, 9 ; CI-NEXT: v_mov_b32_e32 v30, 10 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -5989,7 +5989,9 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 11 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 @@ -5999,7 +6001,6 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -6035,10 +6036,9 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 9 ; GFX9-NEXT: v_mov_b32_e32 v29, 9 ; GFX9-NEXT: v_mov_b32_e32 v30, 10 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6057,11 +6057,12 @@ define void @stack_12xv3i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12 ; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14 ; GFX11-NEXT: v_mov_b32_e32 v4, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 @@ -6084,11 +6085,10 @@ define void @stack_12xv3i32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6107,7 +6107,9 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 11 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 12 @@ -6117,7 +6119,6 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 14 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 15 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -6153,10 +6154,9 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 9 ; HSA-NEXT: v_mov_b32_e32 v29, 9 ; HSA-NEXT: v_mov_b32_e32 v30, 10 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6192,7 +6192,9 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6202,7 +6204,6 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6238,10 +6239,9 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v30, 0x41200000 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6260,7 +6260,9 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6270,7 +6272,6 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6306,10 +6307,9 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v30, 0x41200000 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6328,7 +6328,9 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6338,7 +6340,6 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6374,10 +6375,9 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6396,13 +6396,14 @@ define void @stack_12xv3f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 @@ -6427,11 +6428,10 @@ define void @stack_12xv3f32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6450,7 +6450,9 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6460,7 +6462,6 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6496,10 +6497,9 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6535,7 +6535,9 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 7 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 @@ -6553,7 +6555,6 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 14 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 15 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6589,10 +6590,9 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 5 ; VI-NEXT: v_mov_b32_e32 v29, 5 ; VI-NEXT: v_mov_b32_e32 v30, 6 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6611,7 +6611,9 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 7 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 @@ -6629,7 +6631,6 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 14 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 15 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6665,10 +6666,9 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 5 ; CI-NEXT: v_mov_b32_e32 v29, 5 ; CI-NEXT: v_mov_b32_e32 v30, 6 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6687,7 +6687,9 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 7 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 @@ -6705,7 +6707,6 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6741,10 +6742,9 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 5 ; GFX9-NEXT: v_mov_b32_e32 v29, 5 ; GFX9-NEXT: v_mov_b32_e32 v30, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6763,15 +6763,16 @@ define void @stack_8xv5i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8 ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10 ; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14 ; GFX11-NEXT: v_mov_b32_e32 v6, 13 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 @@ -6795,11 +6796,10 @@ define void @stack_8xv5i32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6818,7 +6818,9 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 7 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 8 @@ -6836,7 +6838,6 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 14 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 15 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6872,10 +6873,9 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 5 ; HSA-NEXT: v_mov_b32_e32 v29, 5 ; HSA-NEXT: v_mov_b32_e32 v30, 6 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6907,7 +6907,9 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -6925,7 +6927,6 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -6961,10 +6962,9 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6983,7 +6983,9 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -7001,7 +7003,6 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7037,10 +7038,9 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -7059,7 +7059,9 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -7077,7 +7079,6 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7113,10 +7114,9 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -7135,19 +7135,20 @@ define void @stack_8xv5f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000 ; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000 ; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v8, s0 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 @@ -7170,11 +7171,10 @@ define void @stack_8xv5f32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7193,7 +7193,9 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -7211,7 +7213,6 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7247,10 +7248,9 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 4df10497bcd27..cdec3b6751e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -20,8 +20,8 @@ define void @use_vcc() #1 { ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s31, v40, 1 ; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s31, v40, 1 ; GCN: v_readlane_b32 s4, v40, 2 ; GCN: s_mov_b32 s33, s4 ; GCN: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index 61a195f9c314f..8c0991fd32849 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -36,11 +36,11 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s35, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s35, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 3 ; MUBUF-NEXT: s_getpc_b64 s[34:35] ; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 @@ -48,10 +48,10 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] -; MUBUF-NEXT: v_readlane_b32 s35, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s35, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 4 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -70,11 +70,11 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s35, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 3 ; FLATSCR-NEXT: s_getpc_b64 s[34:35] ; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 @@ -82,10 +82,10 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] -; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s35, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -110,20 +110,20 @@ define void @test_func_call_external_void_funcx2() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s35, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s35, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 3 ; MUBUF-NEXT: s_getpc_b64 s[34:35] ; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] -; MUBUF-NEXT: v_readlane_b32 s35, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s35, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 4 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -142,20 +142,20 @@ define void @test_func_call_external_void_funcx2() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s35, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 3 ; FLATSCR-NEXT: s_getpc_b64 s[34:35] ; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] -; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s35, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -181,8 +181,8 @@ define void @void_func_void_clobber_s30_s31() #2 { ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s31, v0, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v0, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v0, 1 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] @@ -200,8 +200,8 @@ define void @void_func_void_clobber_s30_s31() #2 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v0, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] @@ -523,23 +523,23 @@ define void @callee_saved_sgpr_func() #2 { ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 3 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 2 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: s_mov_b32 s34, s40 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: v_readlane_b32 s30, v40, 1 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; use s34 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 3 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -559,23 +559,23 @@ define void @callee_saved_sgpr_func() #2 { ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 3 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 2 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_mov_b32 s34, s40 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 1 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s34 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 3 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -626,13 +626,13 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v41, s4, 3 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v41, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v41, s31, 1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: v_writelane_b32 v41, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v41, s30, 1 +; MUBUF-NEXT: v_writelane_b32 v41, s31, 2 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v41, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND @@ -648,9 +648,9 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; MUBUF-NEXT: ; use v40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: v_readlane_b32 s34, v41, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v41, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v41, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v41, 1 +; MUBUF-NEXT: v_readlane_b32 s31, v41, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v41, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v41, 3 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -670,13 +670,13 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v41, s0, 3 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v41, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v41, s31, 1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: v_writelane_b32 v41, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v41, s30, 1 +; FLATSCR-NEXT: v_writelane_b32 v41, s31, 2 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v41, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND @@ -692,9 +692,9 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; FLATSCR-NEXT: ; use v40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: v_readlane_b32 s34, v41, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v41, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v41, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v41, 1 +; FLATSCR-NEXT: v_readlane_b32 s31, v41, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v41, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v41, 3 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index e7254eb5c3465..eb3ef69848a88 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -122,18 +122,18 @@ define void @callee_with_stack_and_call() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[18:19] ; MUBUF-NEXT: v_writelane_b32 v40, s16, 2 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -152,18 +152,18 @@ define void @callee_with_stack_and_call() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -194,15 +194,15 @@ define void @callee_no_stack_with_call() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[18:19] ; MUBUF-NEXT: v_writelane_b32 v40, s16, 2 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -221,15 +221,15 @@ define void @callee_no_stack_with_call() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -359,24 +359,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 -; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s38, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s39, 5 -; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6 -; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7 -; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9 -; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10 -; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s54, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s55, 13 -; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14 -; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15 -; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17 +; FLATSCR-NEXT: v_writelane_b32 v40, s36, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s37, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s38, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s39, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s48, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s49, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s50, 6 +; FLATSCR-NEXT: v_writelane_b32 v40, s51, 7 +; FLATSCR-NEXT: v_writelane_b32 v40, s52, 8 +; FLATSCR-NEXT: v_writelane_b32 v40, s53, 9 +; FLATSCR-NEXT: v_writelane_b32 v40, s54, 10 +; FLATSCR-NEXT: v_writelane_b32 v40, s55, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s64, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s65, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s66, 14 +; FLATSCR-NEXT: v_writelane_b32 v40, s67, 15 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 17 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART @@ -414,6 +414,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[16:31] ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 16 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[72:79] ; FLATSCR-NEXT: ;;#ASMEND @@ -423,24 +424,23 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[0:15] ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17 -; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16 -; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15 -; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s55, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s54, v40, 12 -; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11 -; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10 -; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9 -; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8 -; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7 -; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s39, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s38, v40, 4 -; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 17 +; FLATSCR-NEXT: v_readlane_b32 s67, v40, 15 +; FLATSCR-NEXT: v_readlane_b32 s66, v40, 14 +; FLATSCR-NEXT: v_readlane_b32 s65, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s64, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s55, v40, 11 +; FLATSCR-NEXT: v_readlane_b32 s54, v40, 10 +; FLATSCR-NEXT: v_readlane_b32 s53, v40, 9 +; FLATSCR-NEXT: v_readlane_b32 s52, v40, 8 +; FLATSCR-NEXT: v_readlane_b32 s51, v40, 7 +; FLATSCR-NEXT: v_readlane_b32 s50, v40, 6 +; FLATSCR-NEXT: v_readlane_b32 s49, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s48, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s39, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s38, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s37, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s36, v40, 0 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] @@ -489,15 +489,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) @@ -508,15 +508,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s0, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -537,6 +537,8 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s48, 0 ; MUBUF-NEXT: v_writelane_b32 v1, s49, 1 ; MUBUF-NEXT: v_writelane_b32 v1, s50, 2 @@ -566,19 +568,17 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: v_writelane_b32 v1, s98, 26 ; MUBUF-NEXT: v_writelane_b32 v1, s99, 27 ; MUBUF-NEXT: v_writelane_b32 v1, s100, 28 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s101, 29 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 30 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 30 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_readlane_b32 s102, v1, 30 ; MUBUF-NEXT: v_readlane_b32 s101, v1, 29 ; MUBUF-NEXT: v_readlane_b32 s100, v1, 28 @@ -626,6 +626,8 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s48, 0 ; FLATSCR-NEXT: v_writelane_b32 v1, s49, 1 ; FLATSCR-NEXT: v_writelane_b32 v1, s50, 2 @@ -655,19 +657,17 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: v_writelane_b32 v1, s98, 26 ; FLATSCR-NEXT: v_writelane_b32 v1, s99, 27 ; FLATSCR-NEXT: v_writelane_b32 v1, s100, 28 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s101, 29 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_readlane_b32 s102, v1, 30 ; FLATSCR-NEXT: v_readlane_b32 s101, v1, 29 ; FLATSCR-NEXT: v_readlane_b32 s100, v1, 28 @@ -731,6 +731,8 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v1, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v1, s49, 2 @@ -761,19 +763,17 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: v_writelane_b32 v1, s98, 27 ; MUBUF-NEXT: v_writelane_b32 v1, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v1, s100, 29 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s101, 30 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 31 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_readlane_b32 s102, v1, 31 ; MUBUF-NEXT: v_readlane_b32 s101, v1, 30 ; MUBUF-NEXT: v_readlane_b32 s100, v1, 29 @@ -822,6 +822,8 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v1, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v1, s49, 2 @@ -852,19 +854,17 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: v_writelane_b32 v1, s98, 27 ; FLATSCR-NEXT: v_writelane_b32 v1, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v1, s100, 29 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s101, 30 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_readlane_b32 s102, v1, 31 ; FLATSCR-NEXT: v_readlane_b32 s101, v1, 30 ; FLATSCR-NEXT: v_readlane_b32 s100, v1, 29 @@ -970,15 +970,15 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v1, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -996,15 +996,15 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload @@ -1036,18 +1036,18 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved initial VGPRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -1065,18 +1065,18 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved initial VGPRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload @@ -1116,20 +1116,20 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s6 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 +; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved VGPRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 @@ -1148,21 +1148,21 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v40, s2 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_addk_i32 s32, 0x100c +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved VGPRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 @@ -1210,18 +1210,18 @@ define void @ipra_call_with_stack() #0 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[16:17] -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v1, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, local_empty_func@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, local_empty_func@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -1238,18 +1238,18 @@ define void @ipra_call_with_stack() #0 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, local_empty_func@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, local_empty_func@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload @@ -1319,6 +1319,7 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 @@ -1350,7 +1351,6 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 -; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1407,6 +1407,7 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 @@ -1438,7 +1439,6 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 -; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1519,6 +1519,7 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 32 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v40, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v40, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v40, s49, 2 @@ -1550,7 +1551,6 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-NEXT: v_writelane_b32 v40, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v40, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v40, s101, 30 -; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v40, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1607,6 +1607,7 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v40, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v40, s49, 2 @@ -1638,7 +1639,6 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: v_writelane_b32 v40, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v40, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v40, s101, 30 -; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v40, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1718,6 +1718,7 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 ; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 @@ -1749,10 +1750,9 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 -; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 -; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART @@ -1812,6 +1812,7 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 @@ -1841,12 +1842,11 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26 ; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27 ; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 -; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: scratch_store_dword off, v0, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll index fcc032b51fe58..7abde5b74367d 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -421,17 +421,17 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -454,17 +454,17 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -487,17 +487,17 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -940,7 +940,9 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_writelane_b32 v40, s4, 2 +; GFX7-NEXT: v_writelane_b32 v40, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v40, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[4:5] ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -948,7 +950,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: flat_store_dword v[0:1], v0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, 0x140 -; GFX7-NEXT: v_writelane_b32 v40, s30, 0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX7-NEXT: v_mov_b32_e32 v0, 10 ; GFX7-NEXT: v_mov_b32_e32 v1, 20 @@ -981,11 +982,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: v_mov_b32_e32 v28, 0x122 ; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c ; GFX7-NEXT: v_mov_b32_e32 v30, 0x136 -; GFX7-NEXT: v_writelane_b32 v40, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1004,7 +1004,9 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: v_writelane_b32 v40, s4, 2 +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -1012,7 +1014,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: global_store_dword v[0:1], v0, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0x140 -; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX90A-NEXT: v_mov_b32_e32 v0, 10 ; GFX90A-NEXT: v_mov_b32_e32 v1, 20 @@ -1045,11 +1046,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122 ; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c ; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136 -; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-NEXT: s_mov_b32 s32, s33 ; GFX90A-NEXT: v_readlane_b32 s4, v40, 2 ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1082,20 +1082,20 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1397,8 +1397,10 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 @@ -1408,7 +1410,6 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 20 ; GCN-NEXT: v_mov_b32_e32 v2, 30 ; GCN-NEXT: v_mov_b32_e32 v3, 40 @@ -1439,14 +1440,13 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 2854bdca76d01..718140f82887e 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -266,17 +266,17 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -299,17 +299,17 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -332,17 +332,17 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -652,7 +652,9 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -660,7 +662,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: flat_store_dword v[0:1], v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: v_mov_b32_e32 v1, 20 @@ -693,11 +694,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -730,20 +730,20 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -971,8 +971,10 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 @@ -982,7 +984,6 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 20 ; GCN-NEXT: v_mov_b32_e32 v2, 30 ; GCN-NEXT: v_mov_b32_e32 v3, 40 @@ -1013,14 +1014,13 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1461,16 +1461,16 @@ define void @func_call_no_workitem_id_hints() #2 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_hint@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_hint@rel32@hi+12 ; GCN-NEXT: v_mov_b32_e32 v0, 9 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 38c20c7cf62d6..9335cc304c294 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -33,15 +33,15 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -69,15 +69,15 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -105,15 +105,15 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -141,15 +141,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll index 676144e65c10f..c3c93c1b606ec 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-frame.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -212,118 +212,231 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX900-NEXT: .cfi_register 65, 72 ; GFX900-NEXT: s_mov_b32 s33, s32 ; GFX900-NEXT: .cfi_def_cfa_register 65 +; GFX900-NEXT: s_addk_i32 s32, 0x7100 ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416 ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 28160 ; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 27904 ; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 27648 ; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 27392 ; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 27136 ; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 26880 ; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 26624 ; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 26368 ; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 26112 ; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 25856 ; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 25600 ; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 25344 ; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 25088 ; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 24832 ; GFX900-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 24576 ; GFX900-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 24320 ; GFX900-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 24064 ; GFX900-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 23808 ; GFX900-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 23552 ; GFX900-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 23296 ; GFX900-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 23040 ; GFX900-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 22784 ; GFX900-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 22528 ; GFX900-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 22272 ; GFX900-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 22016 ; GFX900-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 21760 ; GFX900-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 21504 ; GFX900-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 21248 ; GFX900-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 20992 ; GFX900-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 20736 ; GFX900-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 20480 ; GFX900-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 ; GFX900-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 ; GFX900-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 ; GFX900-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 ; GFX900-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 ; GFX900-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 ; GFX900-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 ; GFX900-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 ; GFX900-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 ; GFX900-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 ; GFX900-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 ; GFX900-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 ; GFX900-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 ; GFX900-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 ; GFX900-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 ; GFX900-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 ; GFX900-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 ; GFX900-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 ; GFX900-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 ; GFX900-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 ; GFX900-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 ; GFX900-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 ; GFX900-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 ; GFX900-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 ; GFX900-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 ; GFX900-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 ; GFX900-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 ; GFX900-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 ; GFX900-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 ; GFX900-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 ; GFX900-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 ; GFX900-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 ; GFX900-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 ; GFX900-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 ; GFX900-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 ; GFX900-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 ; GFX900-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 ; GFX900-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 ; GFX900-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 ; GFX900-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 ; GFX900-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 ; GFX900-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 ; GFX900-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 ; GFX900-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 ; GFX900-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 ; GFX900-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 ; GFX900-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 ; GFX900-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 ; GFX900-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 ; GFX900-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 ; GFX900-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 ; GFX900-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 ; GFX900-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 ; GFX900-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 ; GFX900-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 ; GFX900-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 ; GFX900-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 ; GFX900-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 ; GFX900-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 ; GFX900-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 ; GFX900-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 ; GFX900-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 ; GFX900-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 ; GFX900-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 ; GFX900-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 ; GFX900-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 ; GFX900-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 ; GFX900-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 ; GFX900-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 ; GFX900-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 ; GFX900-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 ; GFX900-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 ; GFX900-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 ; GFX900-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 ; GFX900-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 ; GFX900-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 ; GFX900-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 ; GFX900-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 ; GFX900-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 ; GFX900-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; clobber nonpreserved SGPRs ; GFX900-NEXT: ;;#ASMEND @@ -442,7 +555,6 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload -; GFX900-NEXT: s_addk_i32 s32, 0x7100 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: .cfi_def_cfa_register 64 ; GFX900-NEXT: s_mov_b32 s33, s40 @@ -630,118 +742,231 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-DIS-NEXT: .cfi_register 65, 72 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 ; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 28160 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 27904 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 27648 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 27392 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 27136 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 26880 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 26624 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 26368 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 26112 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 25856 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 25600 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 25344 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 25088 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 24832 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 24576 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 24320 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 24064 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 23808 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 23552 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 23296 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 23040 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 22784 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 22528 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 22272 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 22016 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 21760 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 21504 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 21248 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 20992 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 20736 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 20480 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 ; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART ; GFX90A-V2A-DIS-NEXT: ; clobber nonpreserved SGPRs ; GFX90A-V2A-DIS-NEXT: ;;#ASMEND @@ -860,7 +1085,6 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-DIS-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload -; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 ; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 64 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s40 @@ -1080,118 +1304,231 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-EN-NEXT: .cfi_register 65, 72 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 ; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x5100 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2602, 3074, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2603, 3075, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2604, 3076, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2605, 3077, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2606, 3078, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2607, 3079, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2616, 3080, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2617, 3081, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2618, 3082, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2619, 3083, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2620, 3084, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2621, 3085, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2622, 3086, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2623, 3087, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a16, v72 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2632, 3088, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a17, v73 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2633, 3089, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a18, v74 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2634, 3090, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a19, v75 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2635, 3091, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a20, v76 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2636, 3092, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a21, v77 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2637, 3093, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a22, v78 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2638, 3094, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a23, v79 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2639, 3095, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a24, v88 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2648, 3096, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a25, v89 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2649, 3097, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a26, v90 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2650, 3098, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a27, v91 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2651, 3099, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a28, v92 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2652, 3100, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a29, v93 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2653, 3101, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a30, v94 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2654, 3102, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a31, v95 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2655, 3103, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 ; GFX90A-V2A-EN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 ; GFX90A-V2A-EN-NEXT: ;;#ASMSTART ; GFX90A-V2A-EN-NEXT: ; clobber nonpreserved SGPRs ; GFX90A-V2A-EN-NEXT: ;;#ASMEND @@ -1278,7 +1615,6 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; GFX90A-V2A-EN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload ; GFX90A-V2A-EN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload ; GFX90A-V2A-EN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload -; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x5100 ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v95, a31 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v94, a30 ; Reload Reuse ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v93, a29 ; Reload Reuse @@ -1498,118 +1834,231 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; WAVE32-NEXT: .cfi_register 65, 72 ; WAVE32-NEXT: s_mov_b32 s33, s32 ; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: s_addk_i32 s32, 0x3880 ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 14208 ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 14080 ; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1578, 32, 1, 32, 13952 ; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1579, 32, 1, 32, 13824 ; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1580, 32, 1, 32, 13696 ; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1581, 32, 1, 32, 13568 ; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1582, 32, 1, 32, 13440 ; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1583, 32, 1, 32, 13312 ; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1592, 32, 1, 32, 13184 ; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1593, 32, 1, 32, 13056 ; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1594, 32, 1, 32, 12928 ; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1595, 32, 1, 32, 12800 ; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1596, 32, 1, 32, 12672 ; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1597, 32, 1, 32, 12544 ; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1598, 32, 1, 32, 12416 ; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1599, 32, 1, 32, 12288 ; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1608, 32, 1, 32, 12160 ; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1609, 32, 1, 32, 12032 ; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1610, 32, 1, 32, 11904 ; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1611, 32, 1, 32, 11776 ; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1612, 32, 1, 32, 11648 ; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1613, 32, 1, 32, 11520 ; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1614, 32, 1, 32, 11392 ; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1615, 32, 1, 32, 11264 ; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1624, 32, 1, 32, 11136 ; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1625, 32, 1, 32, 11008 ; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1626, 32, 1, 32, 10880 ; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1627, 32, 1, 32, 10752 ; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1628, 32, 1, 32, 10624 ; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1629, 32, 1, 32, 10496 ; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1630, 32, 1, 32, 10368 ; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1631, 32, 1, 32, 10240 ; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1640, 32, 1, 32, 10112 ; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1641, 32, 1, 32, 9984 ; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1642, 32, 1, 32, 9856 ; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1643, 32, 1, 32, 9728 ; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1644, 32, 1, 32, 9600 ; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1645, 32, 1, 32, 9472 ; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1646, 32, 1, 32, 9344 ; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1647, 32, 1, 32, 9216 ; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1656, 32, 1, 32, 9088 ; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1657, 32, 1, 32, 8960 ; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1658, 32, 1, 32, 8832 ; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1659, 32, 1, 32, 8704 ; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1660, 32, 1, 32, 8576 ; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1661, 32, 1, 32, 8448 ; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1662, 32, 1, 32, 8320 ; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1663, 32, 1, 32, 8192 ; WAVE32-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1672, 32, 1, 32, 8064 ; WAVE32-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1673, 32, 1, 32, 7936 ; WAVE32-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1674, 32, 1, 32, 7808 ; WAVE32-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1675, 32, 1, 32, 7680 ; WAVE32-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1676, 32, 1, 32, 7552 ; WAVE32-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1677, 32, 1, 32, 7424 ; WAVE32-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1678, 32, 1, 32, 7296 ; WAVE32-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1679, 32, 1, 32, 7168 ; WAVE32-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1688, 32, 1, 32, 7040 ; WAVE32-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1689, 32, 1, 32, 6912 ; WAVE32-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1690, 32, 1, 32, 6784 ; WAVE32-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1691, 32, 1, 32, 6656 ; WAVE32-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1692, 32, 1, 32, 6528 ; WAVE32-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1693, 32, 1, 32, 6400 ; WAVE32-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1694, 32, 1, 32, 6272 ; WAVE32-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1695, 32, 1, 32, 6144 ; WAVE32-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1704, 32, 1, 32, 6016 ; WAVE32-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1705, 32, 1, 32, 5888 ; WAVE32-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1706, 32, 1, 32, 5760 ; WAVE32-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1707, 32, 1, 32, 5632 ; WAVE32-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1708, 32, 1, 32, 5504 ; WAVE32-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1709, 32, 1, 32, 5376 ; WAVE32-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1710, 32, 1, 32, 5248 ; WAVE32-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1711, 32, 1, 32, 5120 ; WAVE32-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1720, 32, 1, 32, 4992 ; WAVE32-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1721, 32, 1, 32, 4864 ; WAVE32-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1722, 32, 1, 32, 4736 ; WAVE32-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1723, 32, 1, 32, 4608 ; WAVE32-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1724, 32, 1, 32, 4480 ; WAVE32-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1725, 32, 1, 32, 4352 ; WAVE32-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1726, 32, 1, 32, 4224 ; WAVE32-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1727, 32, 1, 32, 4096 ; WAVE32-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1736, 32, 1, 32, 3968 ; WAVE32-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1737, 32, 1, 32, 3840 ; WAVE32-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1738, 32, 1, 32, 3712 ; WAVE32-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1739, 32, 1, 32, 3584 ; WAVE32-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1740, 32, 1, 32, 3456 ; WAVE32-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1741, 32, 1, 32, 3328 ; WAVE32-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1742, 32, 1, 32, 3200 ; WAVE32-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1743, 32, 1, 32, 3072 ; WAVE32-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1752, 32, 1, 32, 2944 ; WAVE32-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1753, 32, 1, 32, 2816 ; WAVE32-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1754, 32, 1, 32, 2688 ; WAVE32-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1755, 32, 1, 32, 2560 ; WAVE32-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1756, 32, 1, 32, 2432 ; WAVE32-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1757, 32, 1, 32, 2304 ; WAVE32-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1758, 32, 1, 32, 2176 ; WAVE32-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1759, 32, 1, 32, 2048 ; WAVE32-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1768, 32, 1, 32, 1920 ; WAVE32-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1769, 32, 1, 32, 1792 ; WAVE32-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1770, 32, 1, 32, 1664 ; WAVE32-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1771, 32, 1, 32, 1536 ; WAVE32-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1772, 32, 1, 32, 1408 ; WAVE32-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1773, 32, 1, 32, 1280 ; WAVE32-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1774, 32, 1, 32, 1152 ; WAVE32-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1775, 32, 1, 32, 1024 ; WAVE32-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1784, 32, 1, 32, 896 ; WAVE32-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1785, 32, 1, 32, 768 ; WAVE32-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1786, 32, 1, 32, 640 ; WAVE32-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1787, 32, 1, 32, 512 ; WAVE32-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1788, 32, 1, 32, 384 ; WAVE32-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1789, 32, 1, 32, 256 ; WAVE32-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1790, 32, 1, 32, 128 ; WAVE32-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1791, 32, 1, 32, 0 ; WAVE32-NEXT: ;;#ASMSTART ; WAVE32-NEXT: ; clobber nonpreserved SGPRs ; WAVE32-NEXT: ;;#ASMEND @@ -1730,7 +2179,6 @@ define void @callee_need_to_spill_fp_to_memory() #1 { ; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 -; WAVE32-NEXT: s_addk_i32 s32, 0x3880 ; WAVE32-NEXT: s_mov_b32 s32, s33 ; WAVE32-NEXT: .cfi_def_cfa_register 64 ; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 @@ -1998,15 +2446,16 @@ define hidden void @func_call_clobber() #0 { ; GFX900-NEXT: v_writelane_b32 v40, s16, 2 ; GFX900-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 ; GFX900-NEXT: .cfi_def_cfa_register 65 -; GFX900-NEXT: s_addk_i32 s32, 0x400 ; GFX900-NEXT: v_writelane_b32 v40, s30, 0 +; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v40, s31, 1 +; GFX900-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX900-NEXT: v_writelane_b32 v40, s31, 1 ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX900-NEXT: v_readlane_b32 s31, v40, 1 ; GFX900-NEXT: v_readlane_b32 s30, v40, 0 +; GFX900-NEXT: v_readlane_b32 s31, v40, 1 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: v_readlane_b32 s4, v40, 2 ; GFX900-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -2271,15 +2720,16 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s16, 2 ; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 ; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 -; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 ; GFX90A-V2A-DIS-NEXT: s_getpc_b64 s[16:17] ; GFX90A-V2A-DIS-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX90A-V2A-DIS-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-V2A-DIS-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 ; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s4, v40, 2 ; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -2544,15 +2994,16 @@ define hidden void @func_call_clobber() #0 { ; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s16, 2 ; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 ; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 -; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 ; GFX90A-V2A-EN-NEXT: s_getpc_b64 s[16:17] ; GFX90A-V2A-EN-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX90A-V2A-EN-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-V2A-EN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX90A-V2A-EN-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-V2A-EN-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 ; GFX90A-V2A-EN-NEXT: v_readlane_b32 s4, v40, 2 ; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -2788,13 +3239,14 @@ define hidden void @func_call_clobber() #0 { ; WAVE32-NEXT: .cfi_def_cfa_register 65 ; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 ; WAVE32-NEXT: s_addk_i32 s32, 0x200 +; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 +; WAVE32-NEXT: .cfi_llvm_vector_registers 16, 1791, 0, 32, 1791, 1, 32 ; WAVE32-NEXT: s_getpc_b64 s[16:17] ; WAVE32-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; WAVE32-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 ; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 ; WAVE32-NEXT: v_readlane_b32 s30, v40, 0 +; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 ; WAVE32-NEXT: s_mov_b32 s32, s33 ; WAVE32-NEXT: v_readlane_b32 s4, v40, 2 ; WAVE32-NEXT: s_or_saveexec_b32 s5, -1 @@ -2819,7 +3271,9 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; clobber ; GFX900-NEXT: ;;#ASMEND @@ -2845,9 +3299,13 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 768 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 512 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3104, 32, 17, 64, 256 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3105, 32, 17, 64, 0 ; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART ; GFX90A-V2A-DIS-NEXT: ; clobber ; GFX90A-V2A-DIS-NEXT: ;;#ASMEND @@ -2879,9 +3337,13 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3104, 2560, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3105, 2561, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: ;;#ASMSTART ; GFX90A-V2A-EN-NEXT: ; clobber ; GFX90A-V2A-EN-NEXT: ;;#ASMEND @@ -2908,7 +3370,9 @@ define hidden void @func_spill_vgpr_to_vmem() #0 { ; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 128 ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 0 ; WAVE32-NEXT: ;;#ASMSTART ; WAVE32-NEXT: ; clobber ; WAVE32-NEXT: ;;#ASMEND @@ -2943,7 +3407,9 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; clobber ; GFX900-NEXT: ;;#ASMEND @@ -2969,9 +3435,13 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 768 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 512 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3104, 32, 17, 64, 256 ; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3105, 32, 17, 64, 0 ; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART ; GFX90A-V2A-DIS-NEXT: ; clobber ; GFX90A-V2A-DIS-NEXT: ;;#ASMEND @@ -3003,9 +3473,13 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 ; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3104, 2560, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3105, 2561, 32, 17, 64 ; GFX90A-V2A-EN-NEXT: ;;#ASMSTART ; GFX90A-V2A-EN-NEXT: ; clobber ; GFX90A-V2A-EN-NEXT: ;;#ASMEND @@ -3032,7 +3506,9 @@ define hidden void @func_spill_vgpr_to_agpr() #2 { ; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 128 ; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 0 ; WAVE32-NEXT: ;;#ASMSTART ; WAVE32-NEXT: ; clobber ; WAVE32-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index a0c25b2a0beb3..bc928041ed750 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -489,22 +489,40 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: v_writelane_b32 v41, s16, 16 ; CHECK-NEXT: .cfi_llvm_vector_registers 65, 2601, 16, 32 ; CHECK-NEXT: .cfi_def_cfa_register 65 -; CHECK-NEXT: v_writelane_b32 v41, s30, 0 -; CHECK-NEXT: v_writelane_b32 v41, s31, 1 -; CHECK-NEXT: v_writelane_b32 v41, s34, 2 -; CHECK-NEXT: v_writelane_b32 v41, s35, 3 -; CHECK-NEXT: v_writelane_b32 v41, s36, 4 -; CHECK-NEXT: v_writelane_b32 v41, s37, 5 -; CHECK-NEXT: v_writelane_b32 v41, s38, 6 -; CHECK-NEXT: v_writelane_b32 v41, s39, 7 -; CHECK-NEXT: v_writelane_b32 v41, s48, 8 -; CHECK-NEXT: v_writelane_b32 v41, s49, 9 -; CHECK-NEXT: v_writelane_b32 v41, s50, 10 -; CHECK-NEXT: v_writelane_b32 v41, s51, 11 -; CHECK-NEXT: v_writelane_b32 v41, s52, 12 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v41, s53, 13 -; CHECK-NEXT: v_writelane_b32 v41, s54, 14 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 0 +; CHECK-NEXT: v_writelane_b32 v41, s34, 0 +; CHECK-NEXT: .cfi_llvm_vector_registers 66, 2622, 0, 32 +; CHECK-NEXT: v_writelane_b32 v41, s35, 1 +; CHECK-NEXT: .cfi_llvm_vector_registers 67, 2622, 1, 32 +; CHECK-NEXT: v_writelane_b32 v41, s36, 2 +; CHECK-NEXT: .cfi_llvm_vector_registers 68, 2622, 2, 32 +; CHECK-NEXT: v_writelane_b32 v41, s37, 3 +; CHECK-NEXT: .cfi_llvm_vector_registers 69, 2622, 3, 32 +; CHECK-NEXT: v_writelane_b32 v41, s38, 4 +; CHECK-NEXT: .cfi_llvm_vector_registers 70, 2622, 4, 32 +; CHECK-NEXT: v_writelane_b32 v41, s39, 5 +; CHECK-NEXT: .cfi_llvm_vector_registers 71, 2622, 5, 32 +; CHECK-NEXT: v_writelane_b32 v41, s48, 6 +; CHECK-NEXT: .cfi_llvm_vector_registers 80, 2622, 6, 32 +; CHECK-NEXT: v_writelane_b32 v41, s49, 7 +; CHECK-NEXT: .cfi_llvm_vector_registers 81, 2622, 7, 32 +; CHECK-NEXT: v_writelane_b32 v41, s50, 8 +; CHECK-NEXT: .cfi_llvm_vector_registers 82, 2622, 8, 32 +; CHECK-NEXT: v_writelane_b32 v41, s51, 9 +; CHECK-NEXT: .cfi_llvm_vector_registers 83, 2622, 9, 32 +; CHECK-NEXT: v_writelane_b32 v41, s52, 10 +; CHECK-NEXT: .cfi_llvm_vector_registers 84, 2622, 10, 32 +; CHECK-NEXT: v_writelane_b32 v41, s53, 11 +; CHECK-NEXT: .cfi_llvm_vector_registers 85, 2622, 11, 32 +; CHECK-NEXT: v_writelane_b32 v41, s54, 12 +; CHECK-NEXT: .cfi_llvm_vector_registers 86, 2622, 12, 32 +; CHECK-NEXT: v_writelane_b32 v41, s55, 13 +; CHECK-NEXT: .cfi_llvm_vector_registers 87, 2622, 13, 32 +; CHECK-NEXT: v_writelane_b32 v41, s30, 14 +; CHECK-NEXT: v_writelane_b32 v41, s31, 15 +; CHECK-NEXT: .cfi_llvm_vector_registers 16, 2622, 14, 32, 2622, 15, 32 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef ; CHECK-NEXT: .Ltmp0: @@ -512,10 +530,8 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12 -; CHECK-NEXT: v_writelane_b32 v41, s55, 15 ; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -541,23 +557,23 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s30, v41, 14 ; CHECK-NEXT: flat_store_dword v[0:1], v2 -; CHECK-NEXT: v_readlane_b32 s55, v41, 15 -; CHECK-NEXT: v_readlane_b32 s54, v41, 14 -; CHECK-NEXT: v_readlane_b32 s53, v41, 13 -; CHECK-NEXT: v_readlane_b32 s52, v41, 12 -; CHECK-NEXT: v_readlane_b32 s51, v41, 11 -; CHECK-NEXT: v_readlane_b32 s50, v41, 10 -; CHECK-NEXT: v_readlane_b32 s49, v41, 9 -; CHECK-NEXT: v_readlane_b32 s48, v41, 8 -; CHECK-NEXT: v_readlane_b32 s39, v41, 7 -; CHECK-NEXT: v_readlane_b32 s38, v41, 6 -; CHECK-NEXT: v_readlane_b32 s37, v41, 5 -; CHECK-NEXT: v_readlane_b32 s36, v41, 4 -; CHECK-NEXT: v_readlane_b32 s35, v41, 3 -; CHECK-NEXT: v_readlane_b32 s34, v41, 2 -; CHECK-NEXT: v_readlane_b32 s31, v41, 1 -; CHECK-NEXT: v_readlane_b32 s30, v41, 0 +; CHECK-NEXT: v_readlane_b32 s31, v41, 15 +; CHECK-NEXT: v_readlane_b32 s55, v41, 13 +; CHECK-NEXT: v_readlane_b32 s54, v41, 12 +; CHECK-NEXT: v_readlane_b32 s53, v41, 11 +; CHECK-NEXT: v_readlane_b32 s52, v41, 10 +; CHECK-NEXT: v_readlane_b32 s51, v41, 9 +; CHECK-NEXT: v_readlane_b32 s50, v41, 8 +; CHECK-NEXT: v_readlane_b32 s49, v41, 7 +; CHECK-NEXT: v_readlane_b32 s48, v41, 6 +; CHECK-NEXT: v_readlane_b32 s39, v41, 5 +; CHECK-NEXT: v_readlane_b32 s38, v41, 4 +; CHECK-NEXT: v_readlane_b32 s37, v41, 3 +; CHECK-NEXT: v_readlane_b32 s36, v41, 2 +; CHECK-NEXT: v_readlane_b32 s35, v41, 1 +; CHECK-NEXT: v_readlane_b32 s34, v41, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v41, 16 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll index c05eef51c276f..a3863156b8d34 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll @@ -286,21 +286,20 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 +; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo -; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 -; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33 ; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -326,21 +325,20 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 +; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo -; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 -; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33 ; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir index d3a8d983bb22c..3cfb96fede71a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir @@ -1650,21 +1650,37 @@ body: | ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -1762,21 +1778,37 @@ body: | ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -1889,21 +1921,37 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -2000,21 +2048,37 @@ body: | ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -2109,21 +2173,37 @@ body: | ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -2219,21 +2299,37 @@ body: | ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll index 76a2114a000cf..f5832e6f307fd 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll @@ -22,13 +22,14 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: v_writelane_b32 v42, s34, 3 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x3000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v42, s30, 0 +; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v42, s30, 0 ; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:92 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:88 @@ -46,7 +47,6 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, v8 -; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -55,8 +55,8 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: v_readlane_b32 s31, v42, 1 ; GCN-NEXT: v_readlane_b32 s30, v42, 0 +; GCN-NEXT: v_readlane_b32 s31, v42, 1 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s4, v42, 2 ; GCN-NEXT: v_readlane_b32 s34, v42, 3 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index 6d54bb544fb8c..5c14af9673d1e 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -501,21 +501,37 @@ body: | ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -604,21 +620,37 @@ body: | ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -722,21 +754,37 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -868,21 +916,37 @@ body: | ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -979,21 +1043,37 @@ body: | ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -1105,21 +1185,37 @@ body: | ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index 2e88da142bb41..2760c7a2187b4 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -16,18 +16,18 @@ define void @callee_with_stack_and_call() #0 { ; SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s4, 2 -; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0 +; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 +; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 ; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5] ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 ; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 ; SPILL-TO-VGPR-NEXT: s_mov_b32 s32, s33 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 2 ; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -46,21 +46,14 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s30, 0 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -69,20 +62,12 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v0, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v0, 0 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v0, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index 381b1741517b7..2a27263e16548 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -1728,20 +1728,20 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s19, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1760,18 +1760,17 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s16 ; GFX11-NEXT: v_writelane_b32 v40, s3, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2133,21 +2132,23 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v2, s28 ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 ; GFX9-NEXT: v_mov_b32_e32 v5, s27 ; GFX9-NEXT: v_mov_b32_e32 v4, s26 ; GFX9-NEXT: v_mov_b32_e32 v3, s25 ; GFX9-NEXT: v_mov_b32_e32 v2, s24 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: v_mov_b32_e32 v5, s23 ; GFX9-NEXT: v_mov_b32_e32 v4, s22 ; GFX9-NEXT: v_mov_b32_e32 v3, s21 ; GFX9-NEXT: v_mov_b32_e32 v2, s20 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s17 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 ; GFX9-NEXT: s_getpc_b64 s[16:17] @@ -2159,8 +2160,8 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -2179,7 +2180,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s26 ; GFX11-NEXT: v_writelane_b32 v40, s25, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21 ; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19 ; GFX11-NEXT: s_getpc_b64 s[20:21] @@ -2188,11 +2191,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17 ; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3 ; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23 ; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 ; GFX11-NEXT: v_mov_b32_e32 v10, s0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32 @@ -2200,8 +2201,8 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index 9d137fb4101e4..031f25bec26fe 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -13,6 +13,7 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: s_or_saveexec_b64 s[34:35], -1 ; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; SDAG-NEXT: s_mov_b64 exec, s[34:35] +; SDAG-NEXT: s_addk_i32 s32, 0x400 ; SDAG-NEXT: v_writelane_b32 v40, s4, 0 ; SDAG-NEXT: v_writelane_b32 v40, s5, 1 ; SDAG-NEXT: v_writelane_b32 v40, s6, 2 @@ -39,47 +40,46 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_writelane_b32 v40, s27, 23 ; SDAG-NEXT: v_writelane_b32 v40, s28, 24 ; SDAG-NEXT: v_writelane_b32 v40, s29, 25 -; SDAG-NEXT: v_writelane_b32 v40, s30, 26 -; SDAG-NEXT: v_writelane_b32 v40, s31, 27 -; SDAG-NEXT: v_writelane_b32 v40, s72, 28 -; SDAG-NEXT: v_writelane_b32 v40, s73, 29 -; SDAG-NEXT: v_writelane_b32 v40, s74, 30 -; SDAG-NEXT: v_writelane_b32 v40, s75, 31 -; SDAG-NEXT: v_writelane_b32 v40, s76, 32 -; SDAG-NEXT: v_writelane_b32 v40, s77, 33 -; SDAG-NEXT: v_writelane_b32 v40, s78, 34 -; SDAG-NEXT: v_writelane_b32 v40, s79, 35 -; SDAG-NEXT: v_writelane_b32 v40, s88, 36 -; SDAG-NEXT: v_writelane_b32 v40, s89, 37 -; SDAG-NEXT: v_writelane_b32 v40, s90, 38 -; SDAG-NEXT: v_writelane_b32 v40, s91, 39 -; SDAG-NEXT: v_writelane_b32 v40, s92, 40 -; SDAG-NEXT: v_writelane_b32 v40, s93, 41 -; SDAG-NEXT: v_writelane_b32 v40, s94, 42 +; SDAG-NEXT: v_writelane_b32 v40, s72, 26 +; SDAG-NEXT: v_writelane_b32 v40, s73, 27 +; SDAG-NEXT: v_writelane_b32 v40, s74, 28 +; SDAG-NEXT: v_writelane_b32 v40, s75, 29 +; SDAG-NEXT: v_writelane_b32 v40, s76, 30 +; SDAG-NEXT: v_writelane_b32 v40, s77, 31 +; SDAG-NEXT: v_writelane_b32 v40, s78, 32 +; SDAG-NEXT: v_writelane_b32 v40, s79, 33 +; SDAG-NEXT: v_writelane_b32 v40, s88, 34 +; SDAG-NEXT: v_writelane_b32 v40, s89, 35 +; SDAG-NEXT: v_writelane_b32 v40, s90, 36 +; SDAG-NEXT: v_writelane_b32 v40, s91, 37 +; SDAG-NEXT: v_writelane_b32 v40, s92, 38 +; SDAG-NEXT: v_writelane_b32 v40, s93, 39 +; SDAG-NEXT: v_writelane_b32 v40, s94, 40 +; SDAG-NEXT: v_writelane_b32 v40, s95, 41 +; SDAG-NEXT: v_writelane_b32 v40, s30, 42 +; SDAG-NEXT: v_writelane_b32 v40, s31, 43 ; SDAG-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; SDAG-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; SDAG-NEXT: s_mov_b64 s[8:9], 0 -; SDAG-NEXT: s_addk_i32 s32, 0x400 -; SDAG-NEXT: v_writelane_b32 v40, s95, 43 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35] -; SDAG-NEXT: v_readlane_b32 s95, v40, 43 -; SDAG-NEXT: v_readlane_b32 s94, v40, 42 -; SDAG-NEXT: v_readlane_b32 s93, v40, 41 -; SDAG-NEXT: v_readlane_b32 s92, v40, 40 -; SDAG-NEXT: v_readlane_b32 s91, v40, 39 -; SDAG-NEXT: v_readlane_b32 s90, v40, 38 -; SDAG-NEXT: v_readlane_b32 s89, v40, 37 -; SDAG-NEXT: v_readlane_b32 s88, v40, 36 -; SDAG-NEXT: v_readlane_b32 s79, v40, 35 -; SDAG-NEXT: v_readlane_b32 s78, v40, 34 -; SDAG-NEXT: v_readlane_b32 s77, v40, 33 -; SDAG-NEXT: v_readlane_b32 s76, v40, 32 -; SDAG-NEXT: v_readlane_b32 s75, v40, 31 -; SDAG-NEXT: v_readlane_b32 s74, v40, 30 -; SDAG-NEXT: v_readlane_b32 s73, v40, 29 -; SDAG-NEXT: v_readlane_b32 s72, v40, 28 -; SDAG-NEXT: v_readlane_b32 s31, v40, 27 -; SDAG-NEXT: v_readlane_b32 s30, v40, 26 +; SDAG-NEXT: v_readlane_b32 s30, v40, 42 +; SDAG-NEXT: v_readlane_b32 s31, v40, 43 +; SDAG-NEXT: v_readlane_b32 s95, v40, 41 +; SDAG-NEXT: v_readlane_b32 s94, v40, 40 +; SDAG-NEXT: v_readlane_b32 s93, v40, 39 +; SDAG-NEXT: v_readlane_b32 s92, v40, 38 +; SDAG-NEXT: v_readlane_b32 s91, v40, 37 +; SDAG-NEXT: v_readlane_b32 s90, v40, 36 +; SDAG-NEXT: v_readlane_b32 s89, v40, 35 +; SDAG-NEXT: v_readlane_b32 s88, v40, 34 +; SDAG-NEXT: v_readlane_b32 s79, v40, 33 +; SDAG-NEXT: v_readlane_b32 s78, v40, 32 +; SDAG-NEXT: v_readlane_b32 s77, v40, 31 +; SDAG-NEXT: v_readlane_b32 s76, v40, 30 +; SDAG-NEXT: v_readlane_b32 s75, v40, 29 +; SDAG-NEXT: v_readlane_b32 s74, v40, 28 +; SDAG-NEXT: v_readlane_b32 s73, v40, 27 +; SDAG-NEXT: v_readlane_b32 s72, v40, 26 ; SDAG-NEXT: v_readlane_b32 s29, v40, 25 ; SDAG-NEXT: v_readlane_b32 s28, v40, 24 ; SDAG-NEXT: v_readlane_b32 s27, v40, 23 @@ -122,6 +122,7 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[34:35] +; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL-NEXT: v_writelane_b32 v40, s6, 2 @@ -148,47 +149,46 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_writelane_b32 v40, s27, 23 ; GISEL-NEXT: v_writelane_b32 v40, s28, 24 ; GISEL-NEXT: v_writelane_b32 v40, s29, 25 -; GISEL-NEXT: v_writelane_b32 v40, s30, 26 -; GISEL-NEXT: v_writelane_b32 v40, s31, 27 -; GISEL-NEXT: v_writelane_b32 v40, s72, 28 -; GISEL-NEXT: v_writelane_b32 v40, s73, 29 -; GISEL-NEXT: v_writelane_b32 v40, s74, 30 -; GISEL-NEXT: v_writelane_b32 v40, s75, 31 -; GISEL-NEXT: v_writelane_b32 v40, s76, 32 -; GISEL-NEXT: v_writelane_b32 v40, s77, 33 -; GISEL-NEXT: v_writelane_b32 v40, s78, 34 -; GISEL-NEXT: v_writelane_b32 v40, s79, 35 -; GISEL-NEXT: v_writelane_b32 v40, s88, 36 -; GISEL-NEXT: v_writelane_b32 v40, s89, 37 -; GISEL-NEXT: v_writelane_b32 v40, s90, 38 -; GISEL-NEXT: v_writelane_b32 v40, s91, 39 -; GISEL-NEXT: v_writelane_b32 v40, s92, 40 -; GISEL-NEXT: v_writelane_b32 v40, s93, 41 -; GISEL-NEXT: v_writelane_b32 v40, s94, 42 +; GISEL-NEXT: v_writelane_b32 v40, s72, 26 +; GISEL-NEXT: v_writelane_b32 v40, s73, 27 +; GISEL-NEXT: v_writelane_b32 v40, s74, 28 +; GISEL-NEXT: v_writelane_b32 v40, s75, 29 +; GISEL-NEXT: v_writelane_b32 v40, s76, 30 +; GISEL-NEXT: v_writelane_b32 v40, s77, 31 +; GISEL-NEXT: v_writelane_b32 v40, s78, 32 +; GISEL-NEXT: v_writelane_b32 v40, s79, 33 +; GISEL-NEXT: v_writelane_b32 v40, s88, 34 +; GISEL-NEXT: v_writelane_b32 v40, s89, 35 +; GISEL-NEXT: v_writelane_b32 v40, s90, 36 +; GISEL-NEXT: v_writelane_b32 v40, s91, 37 +; GISEL-NEXT: v_writelane_b32 v40, s92, 38 +; GISEL-NEXT: v_writelane_b32 v40, s93, 39 +; GISEL-NEXT: v_writelane_b32 v40, s94, 40 +; GISEL-NEXT: v_writelane_b32 v40, s95, 41 +; GISEL-NEXT: v_writelane_b32 v40, s30, 42 +; GISEL-NEXT: v_writelane_b32 v40, s31, 43 ; GISEL-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; GISEL-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; GISEL-NEXT: s_mov_b64 s[8:9], 0 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s95, 43 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GISEL-NEXT: v_readlane_b32 s95, v40, 43 -; GISEL-NEXT: v_readlane_b32 s94, v40, 42 -; GISEL-NEXT: v_readlane_b32 s93, v40, 41 -; GISEL-NEXT: v_readlane_b32 s92, v40, 40 -; GISEL-NEXT: v_readlane_b32 s91, v40, 39 -; GISEL-NEXT: v_readlane_b32 s90, v40, 38 -; GISEL-NEXT: v_readlane_b32 s89, v40, 37 -; GISEL-NEXT: v_readlane_b32 s88, v40, 36 -; GISEL-NEXT: v_readlane_b32 s79, v40, 35 -; GISEL-NEXT: v_readlane_b32 s78, v40, 34 -; GISEL-NEXT: v_readlane_b32 s77, v40, 33 -; GISEL-NEXT: v_readlane_b32 s76, v40, 32 -; GISEL-NEXT: v_readlane_b32 s75, v40, 31 -; GISEL-NEXT: v_readlane_b32 s74, v40, 30 -; GISEL-NEXT: v_readlane_b32 s73, v40, 29 -; GISEL-NEXT: v_readlane_b32 s72, v40, 28 -; GISEL-NEXT: v_readlane_b32 s31, v40, 27 -; GISEL-NEXT: v_readlane_b32 s30, v40, 26 +; GISEL-NEXT: v_readlane_b32 s30, v40, 42 +; GISEL-NEXT: v_readlane_b32 s31, v40, 43 +; GISEL-NEXT: v_readlane_b32 s95, v40, 41 +; GISEL-NEXT: v_readlane_b32 s94, v40, 40 +; GISEL-NEXT: v_readlane_b32 s93, v40, 39 +; GISEL-NEXT: v_readlane_b32 s92, v40, 38 +; GISEL-NEXT: v_readlane_b32 s91, v40, 37 +; GISEL-NEXT: v_readlane_b32 s90, v40, 36 +; GISEL-NEXT: v_readlane_b32 s89, v40, 35 +; GISEL-NEXT: v_readlane_b32 s88, v40, 34 +; GISEL-NEXT: v_readlane_b32 s79, v40, 33 +; GISEL-NEXT: v_readlane_b32 s78, v40, 32 +; GISEL-NEXT: v_readlane_b32 s77, v40, 31 +; GISEL-NEXT: v_readlane_b32 s76, v40, 30 +; GISEL-NEXT: v_readlane_b32 s75, v40, 29 +; GISEL-NEXT: v_readlane_b32 s74, v40, 28 +; GISEL-NEXT: v_readlane_b32 s73, v40, 27 +; GISEL-NEXT: v_readlane_b32 s72, v40, 26 ; GISEL-NEXT: v_readlane_b32 s29, v40, 25 ; GISEL-NEXT: v_readlane_b32 s28, v40, 24 ; GISEL-NEXT: v_readlane_b32 s27, v40, 23 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 3ca36a97981f2..5ada43298deb6 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -133,16 +133,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -163,15 +163,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -192,15 +192,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -221,15 +221,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -253,18 +253,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -284,18 +284,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -315,18 +315,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -346,18 +346,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -382,18 +382,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -413,18 +413,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -444,18 +444,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -475,18 +475,18 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -512,14 +512,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -540,14 +540,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -568,15 +568,15 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -596,15 +596,15 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -625,14 +625,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -656,16 +656,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -685,16 +685,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -714,17 +714,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_i8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -743,17 +743,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_i8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -773,16 +773,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -807,16 +807,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -836,16 +836,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -865,17 +865,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -894,17 +894,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -924,16 +924,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -959,14 +959,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -987,14 +987,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1015,15 +1015,15 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1043,15 +1043,15 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1072,14 +1072,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1103,16 +1103,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1132,16 +1132,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1161,17 +1161,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1190,17 +1190,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1220,16 +1220,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1254,16 +1254,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1283,16 +1283,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1312,17 +1312,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1341,17 +1341,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1371,16 +1371,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1406,14 +1406,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1434,14 +1434,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1462,15 +1462,15 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1491,14 +1491,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1523,15 +1523,15 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1552,15 +1552,15 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1581,15 +1581,15 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1610,15 +1610,15 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1642,17 +1642,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1672,17 +1672,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1702,18 +1702,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1733,17 +1732,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1769,17 +1768,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1800,17 +1799,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1831,16 +1830,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1861,17 +1860,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1895,19 +1894,19 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1927,19 +1926,19 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1959,18 +1958,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1990,19 +1988,19 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2029,21 +2027,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2063,21 +2061,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: v_mov_b32_e32 v6, 3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2097,19 +2095,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2129,21 +2127,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3 -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2170,14 +2168,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2198,14 +2196,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2226,15 +2224,15 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -2254,15 +2252,15 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -2283,14 +2281,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2315,14 +2313,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2343,14 +2341,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2371,15 +2369,15 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2400,14 +2398,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2432,15 +2430,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2461,15 +2459,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2490,15 +2488,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2519,15 +2517,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2552,16 +2550,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2582,16 +2580,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2612,16 +2610,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2642,16 +2640,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2676,6 +2674,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 @@ -2683,11 +2683,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2708,6 +2706,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2715,11 +2715,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2740,17 +2738,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2771,6 +2769,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2778,11 +2778,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2807,15 +2805,15 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2836,15 +2834,15 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2865,15 +2863,15 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2894,15 +2892,15 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2927,17 +2925,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2958,17 +2956,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2989,16 +2987,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3019,17 +3017,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3054,6 +3052,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 @@ -3062,11 +3062,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3087,6 +3085,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 @@ -3095,11 +3095,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3120,17 +3118,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3151,6 +3149,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 @@ -3159,11 +3159,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3187,20 +3185,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3220,20 +3218,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3253,21 +3251,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -3286,21 +3284,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -3320,20 +3318,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3358,20 +3356,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3391,20 +3389,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3424,20 +3422,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3457,20 +3455,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3495,21 +3493,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3529,21 +3527,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3563,21 +3561,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3597,21 +3595,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3636,14 +3634,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3651,8 +3649,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, v6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3672,14 +3670,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3687,8 +3685,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v4, v6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3708,14 +3706,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3723,8 +3721,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v4, v6 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3744,14 +3742,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3759,8 +3757,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3785,14 +3783,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3803,8 +3801,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, v8 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3824,14 +3822,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3842,8 +3840,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v8 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3863,14 +3861,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3880,8 +3878,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3901,14 +3899,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3919,8 +3917,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3945,17 +3943,17 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi +; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -3995,8 +3993,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v18, v33 ; GFX9-NEXT: v_mov_b32_e32 v19, v34 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4016,18 +4014,18 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4067,8 +4065,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v18, v33 ; GFX10-NEXT: v_mov_b32_e32 v19, v34 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4088,16 +4086,17 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4134,8 +4133,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33 ; GFX11-NEXT: v_mov_b32_e32 v19, v34 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4155,18 +4154,18 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4206,8 +4205,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4233,23 +4232,23 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ubyte v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_byte v[40:41], v0, off ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4269,23 +4268,23 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_byte v[40:41], v0, off ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4305,24 +4304,25 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: global_store_b8 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4341,24 +4341,25 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: global_store_b8 v[40:41], v0, off ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4378,23 +4379,23 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_byte v[40:41], v0, off ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4421,17 +4422,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ushort v0, v[40:41], off ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -4441,8 +4442,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4462,23 +4463,23 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo ; GFX10-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4503,17 +4504,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -4521,8 +4523,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 @@ -4547,17 +4549,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -4565,8 +4568,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 @@ -4591,23 +4594,23 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4639,16 +4642,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4662,8 +4665,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4683,16 +4686,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4700,8 +4703,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_mov_b32_e32 v3, 2 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 @@ -4727,35 +4730,36 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4) ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_store_b8 v[3:4], v2, off ; GFX11-TRUE16-NEXT: global_store_b16 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4774,17 +4778,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4794,8 +4799,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 @@ -4822,16 +4827,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4839,8 +4844,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 @@ -4873,16 +4878,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4897,8 +4902,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4918,16 +4923,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4935,8 +4940,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4963,17 +4968,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4984,11 +4990,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: global_store_b32 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 @@ -5011,17 +5017,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5032,8 +5039,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5064,16 +5071,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5081,8 +5088,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5116,16 +5123,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5145,8 +5152,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5166,16 +5173,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5185,8 +5192,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5216,17 +5223,18 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5241,7 +5249,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v0.h -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_store_b8 v[2:3], v4, off @@ -5250,7 +5258,6 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -5269,17 +5276,18 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5292,8 +5300,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5327,16 +5335,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5346,8 +5354,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5384,16 +5392,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5418,8 +5426,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5439,16 +5447,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5463,12 +5471,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 +; GFX10-NEXT: v_readlane_b32 s30, v42, 0 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 @@ -5494,17 +5502,18 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5526,14 +5535,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v1.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v3.h, v2.h ; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v2.l, v3.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: global_store_b64 v[40:41], v[3:4], off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -5552,17 +5560,18 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5584,12 +5593,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v6, v7 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5617,16 +5626,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5641,12 +5650,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 @@ -5679,21 +5688,21 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v44, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v42, 16 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: v_mov_b32_e32 v43, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX9-NEXT: v_writelane_b32 v44, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -5780,8 +5789,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: v_readlane_b32 s30, v44, 0 +; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v44, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5801,22 +5810,22 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v44, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -5903,8 +5912,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v44, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5924,21 +5933,25 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi -; GFX11-TRUE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo +; GFX11-TRUE16-NEXT: s_clause 0x1 +; GFX11-TRUE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-TRUE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6031,8 +6044,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -6051,21 +6064,25 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi -; GFX11-FAKE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo +; GFX11-FAKE16-NEXT: s_clause 0x1 +; GFX11-FAKE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-FAKE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6182,8 +6199,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -6203,22 +6220,22 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:12 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v43, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6305,8 +6322,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 offset:4 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v44, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6334,15 +6351,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6362,15 +6379,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6390,16 +6407,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6419,15 +6436,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6452,15 +6469,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6480,15 +6497,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6508,16 +6525,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6537,15 +6554,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6570,15 +6587,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6598,15 +6615,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6626,16 +6643,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6655,15 +6672,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6689,15 +6706,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6718,15 +6735,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6747,15 +6764,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6776,15 +6793,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6809,15 +6826,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6838,15 +6855,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6867,16 +6884,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6897,15 +6914,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6929,15 +6946,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6957,15 +6974,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6985,16 +7002,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7014,15 +7031,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7048,15 +7065,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7077,15 +7094,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7106,16 +7123,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7136,15 +7153,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7168,15 +7185,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7196,15 +7213,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7224,16 +7241,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7253,15 +7270,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7286,15 +7303,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7314,15 +7331,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7342,16 +7359,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7371,15 +7388,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7405,15 +7422,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7434,15 +7451,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7463,15 +7480,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7492,15 +7509,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7525,16 +7542,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7555,16 +7572,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7585,16 +7602,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_mov_b32_e32 v2, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7615,16 +7632,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7649,17 +7666,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7680,17 +7697,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7711,16 +7728,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7741,17 +7758,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7775,15 +7792,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7803,15 +7820,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7831,16 +7848,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7860,15 +7877,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7894,17 +7911,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7925,17 +7942,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7956,16 +7973,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7986,17 +8003,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8021,6 +8038,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 @@ -8028,11 +8047,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8053,6 +8070,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 @@ -8060,11 +8079,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8085,17 +8102,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_mov_b32_e32 v4, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8116,6 +8133,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 @@ -8123,11 +8142,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8151,19 +8168,19 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8183,10 +8200,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] @@ -8194,10 +8212,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8217,21 +8234,21 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1] ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8251,10 +8268,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] @@ -8262,10 +8280,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8292,6 +8309,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 @@ -8302,11 +8321,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v5, 6 ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8327,6 +8344,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 @@ -8337,11 +8356,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8362,18 +8379,18 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8394,6 +8411,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 @@ -8404,11 +8423,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8432,11 +8449,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 @@ -8445,8 +8462,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8466,10 +8483,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] @@ -8479,10 +8497,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8502,10 +8519,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v12, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v12, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_load_b128 v[0:3], v12, s[0:1] @@ -8514,11 +8532,10 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8538,10 +8555,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x3 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] @@ -8551,10 +8569,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8580,11 +8597,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8598,8 +8615,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8619,10 +8636,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] @@ -8636,10 +8654,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8659,10 +8676,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v28, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v28, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1] @@ -8675,11 +8693,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8699,10 +8716,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x7 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] @@ -8716,10 +8734,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8745,11 +8762,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8762,12 +8780,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8787,10 +8804,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 @@ -8805,12 +8823,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8830,10 +8847,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v28, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v28, 0 ; GFX11-NEXT: global_load_b32 v32, v[0:1], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 @@ -8847,12 +8865,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8872,10 +8889,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x7 @@ -8890,12 +8908,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8922,23 +8939,23 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, v0 ; GFX9-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[40:41], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8958,15 +8975,15 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[40:41], v0, off @@ -8974,8 +8991,8 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8995,24 +9012,26 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; GFX11-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v41, v1 :: v_dual_mov_b32 v40, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v42, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: global_store_b32 v[40:41], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9032,15 +9051,15 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_dword v[40:41], v0, off @@ -9048,8 +9067,8 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9074,19 +9093,19 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9106,10 +9125,11 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] @@ -9117,10 +9137,9 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9140,21 +9159,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9173,21 +9192,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[0:1] ; GFX11-FAKE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9207,10 +9226,11 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1] @@ -9218,10 +9238,9 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9247,19 +9266,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9279,19 +9298,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9311,21 +9330,20 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s33 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9344,20 +9362,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s33 -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9377,19 +9394,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9418,23 +9435,23 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -9459,24 +9476,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -9501,24 +9518,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: s_add_i32 s2, s33, 8 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s33 offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s33 offset:12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -9542,23 +9559,23 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_add_i32 s2, s33, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_load_u8 v0, off, s33 offset:8 ; GFX11-FAKE16-NEXT: scratch_load_b32 v1, off, s33 offset:12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) @@ -9583,24 +9600,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -9642,11 +9659,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi @@ -9671,8 +9688,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9692,11 +9709,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -9722,8 +9739,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9743,11 +9760,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1] ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi @@ -9770,8 +9787,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v1, v16 ; GFX11-NEXT: v_dual_mov_b32 v2, v17 :: v_dual_mov_b32 v3, v18 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9791,11 +9808,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 @@ -9821,8 +9838,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9847,49 +9864,49 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s30, 14 +; GFX9-NEXT: v_writelane_b32 v40, s31, 15 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:16 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:20 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 ; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 14 +; GFX9-NEXT: v_readlane_b32 s31, v40, 15 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload @@ -9907,50 +9924,50 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-NEXT: v_writelane_b32 v40, s48, 6 +; GFX10-NEXT: v_writelane_b32 v40, s49, 7 +; GFX10-NEXT: v_writelane_b32 v40, s50, 8 +; GFX10-NEXT: v_writelane_b32 v40, s51, 9 +; GFX10-NEXT: v_writelane_b32 v40, s52, 10 +; GFX10-NEXT: v_writelane_b32 v40, s53, 11 +; GFX10-NEXT: v_writelane_b32 v40, s54, 12 +; GFX10-NEXT: v_writelane_b32 v40, s55, 13 +; GFX10-NEXT: v_writelane_b32 v40, s30, 14 +; GFX10-NEXT: v_writelane_b32 v40, s31, 15 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:16 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:20 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX10-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-NEXT: v_writelane_b32 v40, s48, 8 -; GFX10-NEXT: v_writelane_b32 v40, s49, 9 -; GFX10-NEXT: v_writelane_b32 v40, s50, 10 -; GFX10-NEXT: v_writelane_b32 v40, s51, 11 -; GFX10-NEXT: v_writelane_b32 v40, s52, 12 -; GFX10-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-NEXT: v_writelane_b32 v40, s54, 14 -; GFX10-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s55, v40, 15 -; GFX10-NEXT: v_readlane_b32 s54, v40, 14 -; GFX10-NEXT: v_readlane_b32 s53, v40, 13 -; GFX10-NEXT: v_readlane_b32 s52, v40, 12 -; GFX10-NEXT: v_readlane_b32 s51, v40, 11 -; GFX10-NEXT: v_readlane_b32 s50, v40, 10 -; GFX10-NEXT: v_readlane_b32 s49, v40, 9 -; GFX10-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 14 +; GFX10-NEXT: v_readlane_b32 s31, v40, 15 +; GFX10-NEXT: v_readlane_b32 s55, v40, 13 +; GFX10-NEXT: v_readlane_b32 s54, v40, 12 +; GFX10-NEXT: v_readlane_b32 s53, v40, 11 +; GFX10-NEXT: v_readlane_b32 s52, v40, 10 +; GFX10-NEXT: v_readlane_b32 s51, v40, 9 +; GFX10-NEXT: v_readlane_b32 s50, v40, 8 +; GFX10-NEXT: v_readlane_b32 s49, v40, 7 +; GFX10-NEXT: v_readlane_b32 s48, v40, 6 +; GFX10-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload @@ -9968,47 +9985,47 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:24 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, 32 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-NEXT: v_writelane_b32 v40, s30, 14 +; GFX11-NEXT: v_writelane_b32 v40, s31, 15 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 offset:16 ; GFX11-NEXT: scratch_load_b32 v31, off, s33 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-NEXT: v_writelane_b32 v40, s55, 15 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s30, v40, 14 +; GFX11-NEXT: v_readlane_b32 s31, v40, 15 +; GFX11-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:24 ; 4-byte Folded Reload @@ -10026,47 +10043,47 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 15 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:24 ; 4-byte Folded Reload @@ -10092,16 +10109,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -10122,15 +10139,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10151,15 +10168,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10180,15 +10197,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10212,16 +10229,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10242,16 +10259,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX10-NEXT: s_movk_i32 s4, 0x7b -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10272,17 +10289,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX11-NEXT: s_movk_i32 s4, 0x7b -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10303,16 +10319,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10337,16 +10353,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10367,16 +10383,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX10-NEXT: s_movk_i32 s4, 0x7b -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10397,17 +10413,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX11-NEXT: s_movk_i32 s4, 0x7b -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10428,16 +10443,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10462,16 +10477,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10492,16 +10507,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 42 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10522,17 +10537,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 42 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10553,16 +10567,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10587,18 +10601,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -10620,18 +10634,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX10-NEXT: s_movk_i32 s4, 0x7b -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -10653,19 +10667,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX11-NEXT: s_movk_i32 s4, 0x7b -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -10687,18 +10700,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -10724,20 +10737,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -10761,20 +10774,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -10798,21 +10811,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -10836,20 +10849,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -10878,22 +10891,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -10917,22 +10930,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -10956,23 +10969,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 3 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -10996,22 +11008,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -11039,24 +11051,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -11082,24 +11094,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 +; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -11125,25 +11137,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 -; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-NEXT: s_mov_b32 s8, 1 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6 +; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 @@ -11169,24 +11180,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -11219,28 +11230,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -11268,28 +11279,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-NEXT: s_mov_b32 s10, 3 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 +; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s9, 2 +; GFX10-NEXT: s_mov_b32 s10, 3 +; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -11317,29 +11328,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 -; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6 -; GFX11-NEXT: s_mov_b32 s10, 3 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 -; GFX11-NEXT: s_mov_b32 s11, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-NEXT: s_mov_b32 s8, 1 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s9, 2 +; GFX11-NEXT: s_mov_b32 s10, 3 +; GFX11-NEXT: s_mov_b32 s11, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -11367,28 +11377,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -11422,16 +11432,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -11452,16 +11462,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX10-NEXT: s_movk_i32 s4, 0x4400 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -11482,17 +11492,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX11-NEXT: s_movk_i32 s4, 0x4400 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -11513,16 +11522,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -11547,16 +11556,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -11577,16 +11586,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 4.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -11607,17 +11616,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 4.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -11638,16 +11646,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -11672,18 +11680,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -11705,18 +11713,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -11738,19 +11746,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -11772,18 +11779,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -11809,20 +11816,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -11845,20 +11852,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 4.0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 3 -; GFX10-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -11881,21 +11888,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 3 +; GFX11-NEXT: v_writelane_b32 v40, s31, 4 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 4.0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 3 -; GFX11-NEXT: v_writelane_b32 v40, s31, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3 +; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 @@ -11918,20 +11924,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -11958,12 +11964,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 7 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 @@ -11971,11 +11979,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -12000,24 +12006,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 4.0 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, -1.0 -; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 0.5 -; GFX10-NEXT: v_writelane_b32 v40, s30, 5 -; GFX10-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -12042,25 +12048,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s8, 4 +; GFX11-NEXT: v_writelane_b32 v40, s30, 5 +; GFX11-NEXT: v_writelane_b32 v40, s31, 6 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 4.0 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, -1.0 -; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 0.5 -; GFX11-NEXT: v_writelane_b32 v40, s30, 5 -; GFX11-NEXT: v_writelane_b32 v40, s31, 6 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5 +; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 @@ -12085,24 +12090,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -12131,18 +12136,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12164,18 +12169,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12197,19 +12202,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12231,18 +12235,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -12268,22 +12272,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -12307,22 +12311,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 0 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -12346,23 +12350,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 0 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -12386,22 +12389,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -12429,6 +12432,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -12436,6 +12440,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 @@ -12444,11 +12449,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -12474,26 +12477,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 0 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 0 -; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 6 -; GFX10-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -12519,27 +12522,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s8, 4 +; GFX11-NEXT: v_writelane_b32 v40, s9, 5 +; GFX11-NEXT: v_writelane_b32 v40, s30, 6 +; GFX11-NEXT: v_writelane_b32 v40, s31, 7 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 0 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 0 -; GFX11-NEXT: v_writelane_b32 v40, s9, 5 ; GFX11-NEXT: s_mov_b32 s9, 0x40200000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 6 -; GFX11-NEXT: v_writelane_b32 v40, s31, 7 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6 +; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 @@ -12565,26 +12567,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -12614,16 +12616,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -12644,16 +12646,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -12674,17 +12676,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -12705,16 +12707,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -12740,17 +12742,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12772,17 +12774,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12804,18 +12806,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12837,17 +12839,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -12874,17 +12876,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12906,17 +12908,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12938,18 +12940,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12971,17 +12973,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13008,18 +13010,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13041,18 +13043,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13074,19 +13076,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0x20001 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 3 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13108,18 +13109,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13145,18 +13146,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13178,18 +13179,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13211,19 +13212,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_movk_i32 s5, 0x4400 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13245,18 +13245,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13282,17 +13282,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13314,17 +13314,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13346,18 +13346,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13379,17 +13379,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13416,18 +13416,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13449,18 +13449,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13482,19 +13482,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 0x20001 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 0x40003 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13516,18 +13515,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13553,16 +13552,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -13583,16 +13582,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -13613,17 +13612,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -13644,16 +13643,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -13679,17 +13678,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13711,17 +13710,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13743,18 +13742,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13776,17 +13775,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13813,18 +13812,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13846,18 +13845,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13879,19 +13878,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13913,18 +13911,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13950,20 +13948,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -13986,20 +13984,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 5 -; GFX10-NEXT: v_writelane_b32 v40, s30, 3 -; GFX10-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -14022,21 +14020,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 3 +; GFX11-NEXT: v_writelane_b32 v40, s31, 4 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 3 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 4 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 5 -; GFX11-NEXT: v_writelane_b32 v40, s30, 3 -; GFX11-NEXT: v_writelane_b32 v40, s31, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3 +; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 @@ -14059,20 +14056,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -14099,22 +14096,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14138,22 +14135,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 5 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 6 -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14177,23 +14174,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 3 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 4 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 5 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 6 -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14217,22 +14213,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14260,19 +14256,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14296,19 +14292,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14332,20 +14328,20 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14369,19 +14365,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14410,22 +14406,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14449,22 +14445,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14488,23 +14484,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 3 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14528,22 +14523,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14571,12 +14566,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 7 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 @@ -14584,11 +14581,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -14613,24 +14608,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 5 -; GFX10-NEXT: v_writelane_b32 v40, s30, 5 -; GFX10-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -14655,25 +14650,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s8, 4 +; GFX11-NEXT: v_writelane_b32 v40, s30, 5 +; GFX11-NEXT: v_writelane_b32 v40, s31, 6 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 3 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 4 -; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 5 -; GFX11-NEXT: v_writelane_b32 v40, s30, 5 -; GFX11-NEXT: v_writelane_b32 v40, s31, 6 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5 +; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 @@ -14698,24 +14692,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -14744,25 +14738,25 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -14790,9 +14784,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -14800,15 +14793,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 8 -; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -14836,9 +14830,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -14846,16 +14839,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 8 -; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -14883,9 +14877,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -14893,15 +14886,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -14935,6 +14929,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -14944,6 +14939,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 @@ -14954,11 +14950,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s9, 6 ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -14986,30 +14980,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 3 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 6 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-NEXT: s_mov_b32 s10, 7 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: s_mov_b32 s7, 4 +; GFX10-NEXT: s_mov_b32 s8, 5 +; GFX10-NEXT: s_mov_b32 s9, 6 +; GFX10-NEXT: s_mov_b32 s10, 7 +; GFX10-NEXT: s_mov_b32 s11, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -15037,31 +15031,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s8, 4 +; GFX11-NEXT: v_writelane_b32 v40, s9, 5 +; GFX11-NEXT: v_writelane_b32 v40, s10, 6 +; GFX11-NEXT: v_writelane_b32 v40, s11, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX11-NEXT: s_mov_b32 s4, 1 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_mov_b32 s5, 2 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 3 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 4 -; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 5 -; GFX11-NEXT: v_writelane_b32 v40, s9, 5 ; GFX11-NEXT: s_mov_b32 s9, 6 -; GFX11-NEXT: v_writelane_b32 v40, s10, 6 ; GFX11-NEXT: s_mov_b32 s10, 7 -; GFX11-NEXT: v_writelane_b32 v40, s11, 7 ; GFX11-NEXT: s_mov_b32 s11, 8 -; GFX11-NEXT: v_writelane_b32 v40, s30, 8 -; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -15089,30 +15082,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -15144,6 +15137,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 18 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15155,22 +15149,21 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13 @@ -15206,9 +15199,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 18 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -15224,15 +15216,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13 @@ -15268,9 +15261,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 18 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -15286,16 +15278,17 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s30, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 17 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 17 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 17 ; GFX11-NEXT: v_readlane_b32 s30, v40, 16 +; GFX11-NEXT: v_readlane_b32 s31, v40, 17 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13 @@ -15331,9 +15324,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -15349,15 +15341,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 @@ -15399,6 +15392,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 28 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15415,23 +15409,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -15440,11 +15437,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 ; GFX9-NEXT: s_mov_b32 s22, s38 @@ -15455,11 +15449,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -15505,9 +15498,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -15523,29 +15515,40 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -15554,19 +15557,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -15612,11 +15605,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s2, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -15632,44 +15622,45 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s20, 16 +; GFX11-NEXT: v_writelane_b32 v40, s21, 17 +; GFX11-NEXT: v_writelane_b32 v40, s22, 18 +; GFX11-NEXT: v_writelane_b32 v40, s23, 19 +; GFX11-NEXT: v_writelane_b32 v40, s24, 20 +; GFX11-NEXT: v_writelane_b32 v40, s25, 21 +; GFX11-NEXT: v_writelane_b32 v40, s26, 22 +; GFX11-NEXT: v_writelane_b32 v40, s27, 23 +; GFX11-NEXT: v_writelane_b32 v40, s28, 24 +; GFX11-NEXT: v_writelane_b32 v40, s29, 25 +; GFX11-NEXT: v_writelane_b32 v40, s30, 26 +; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_add_i32 s2, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s20, 16 -; GFX11-NEXT: v_writelane_b32 v40, s21, 17 -; GFX11-NEXT: v_writelane_b32 v40, s22, 18 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51 -; GFX11-NEXT: v_writelane_b32 v40, s23, 19 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47 ; GFX11-NEXT: v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49 -; GFX11-NEXT: v_writelane_b32 v40, s24, 20 ; GFX11-NEXT: s_mov_b32 s20, s36 ; GFX11-NEXT: s_mov_b32 s21, s37 ; GFX11-NEXT: s_mov_b32 s22, s38 ; GFX11-NEXT: s_mov_b32 s23, s39 -; GFX11-NEXT: v_writelane_b32 v40, s25, 21 ; GFX11-NEXT: s_mov_b32 s24, s40 ; GFX11-NEXT: s_mov_b32 s25, s41 -; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: v_writelane_b32 v40, s26, 22 ; GFX11-NEXT: s_mov_b32 s26, s42 -; GFX11-NEXT: v_writelane_b32 v40, s27, 23 ; GFX11-NEXT: s_mov_b32 s27, s43 -; GFX11-NEXT: v_writelane_b32 v40, s28, 24 ; GFX11-NEXT: s_mov_b32 s28, s44 -; GFX11-NEXT: v_writelane_b32 v40, s29, 25 ; GFX11-NEXT: s_mov_b32 s29, s45 -; GFX11-NEXT: v_writelane_b32 v40, s30, 26 -; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26 +; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23 @@ -15715,10 +15706,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -15734,46 +15723,48 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -15825,6 +15816,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 28 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15840,41 +15832,41 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: v_writelane_b32 v40, s25, 21 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s24, 20 -; GFX9-NEXT: v_writelane_b32 v40, s25, 21 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s52 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 @@ -15886,11 +15878,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -15936,9 +15927,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 @@ -15954,6 +15944,19 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 @@ -15963,46 +15966,34 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s52 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -16048,11 +16039,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s3, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 @@ -16068,6 +16056,20 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s20, 16 +; GFX11-NEXT: v_writelane_b32 v40, s21, 17 +; GFX11-NEXT: v_writelane_b32 v40, s22, 18 +; GFX11-NEXT: v_writelane_b32 v40, s23, 19 +; GFX11-NEXT: v_writelane_b32 v40, s24, 20 +; GFX11-NEXT: v_writelane_b32 v40, s25, 21 +; GFX11-NEXT: v_writelane_b32 v40, s26, 22 +; GFX11-NEXT: v_writelane_b32 v40, s27, 23 +; GFX11-NEXT: v_writelane_b32 v40, s28, 24 +; GFX11-NEXT: v_writelane_b32 v40, s29, 25 +; GFX11-NEXT: v_writelane_b32 v40, s30, 26 +; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_add_i32 s3, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x2 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -16075,41 +16077,28 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s20, 16 -; GFX11-NEXT: v_writelane_b32 v40, s21, 17 -; GFX11-NEXT: v_writelane_b32 v40, s22, 18 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51 -; GFX11-NEXT: v_writelane_b32 v40, s23, 19 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49 -; GFX11-NEXT: v_writelane_b32 v40, s24, 20 ; GFX11-NEXT: v_mov_b32_e32 v2, s48 ; GFX11-NEXT: s_add_i32 s2, s32, 24 ; GFX11-NEXT: s_mov_b32 s20, s36 ; GFX11-NEXT: s_mov_b32 s21, s37 -; GFX11-NEXT: v_writelane_b32 v40, s25, 21 ; GFX11-NEXT: s_mov_b32 s22, s38 ; GFX11-NEXT: s_mov_b32 s23, s39 ; GFX11-NEXT: s_mov_b32 s24, s40 ; GFX11-NEXT: s_mov_b32 s25, s41 -; GFX11-NEXT: v_writelane_b32 v40, s26, 22 ; GFX11-NEXT: s_mov_b32 s26, s42 -; GFX11-NEXT: scratch_store_b32 off, v6, s2 -; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: v_writelane_b32 v40, s27, 23 ; GFX11-NEXT: s_mov_b32 s27, s43 -; GFX11-NEXT: v_writelane_b32 v40, s28, 24 ; GFX11-NEXT: s_mov_b32 s28, s44 -; GFX11-NEXT: v_writelane_b32 v40, s29, 25 ; GFX11-NEXT: s_mov_b32 s29, s45 -; GFX11-NEXT: v_writelane_b32 v40, s30, 26 -; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: scratch_store_b32 off, v6, s2 +; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26 +; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23 @@ -16155,10 +16144,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 @@ -16174,6 +16161,20 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x2 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -16183,43 +16184,31 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -16272,20 +16261,20 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16305,21 +16294,21 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16339,17 +16328,17 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16369,17 +16358,17 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16404,7 +16393,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 @@ -16412,7 +16403,6 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo @@ -16448,10 +16438,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 9 ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16471,12 +16460,14 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 12 ; GFX10-NEXT: v_mov_b32_e32 v1, 13 ; GFX10-NEXT: v_mov_b32_e32 v2, 14 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 @@ -16485,7 +16476,6 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: v_mov_b32_e32 v6, 2 ; GFX10-NEXT: v_mov_b32_e32 v7, 2 @@ -16515,10 +16505,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16538,15 +16527,16 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13 ; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1 +; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1 -; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2 ; GFX11-NEXT: v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3 ; GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3 ; GFX11-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4 @@ -16561,11 +16551,10 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16585,21 +16574,22 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3 @@ -16626,10 +16616,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16666,7 +16655,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 9 @@ -16682,7 +16673,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo @@ -16718,10 +16708,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 5 ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16741,19 +16730,21 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-NEXT: v_mov_b32_e32 v2, 10 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 14 +; GFX10-NEXT: v_mov_b32_e32 v4, 15 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 11 ; GFX10-NEXT: v_mov_b32_e32 v1, 12 ; GFX10-NEXT: v_mov_b32_e32 v2, 13 -; GFX10-NEXT: v_mov_b32_e32 v4, 15 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 @@ -16764,7 +16755,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: v_mov_b32_e32 v6, 1 ; GFX10-NEXT: v_mov_b32_e32 v7, 1 ; GFX10-NEXT: v_mov_b32_e32 v8, 1 @@ -16793,10 +16783,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16816,12 +16805,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 9 ; GFX11-NEXT: v_dual_mov_b32 v2, 10 :: v_dual_mov_b32 v3, 11 ; GFX11-NEXT: v_dual_mov_b32 v4, 12 :: v_dual_mov_b32 v5, 13 ; GFX11-NEXT: v_dual_mov_b32 v6, 14 :: v_dual_mov_b32 v7, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 @@ -16843,11 +16833,10 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16867,6 +16856,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 10 @@ -16875,8 +16867,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 13 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 14 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 15 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0 @@ -16914,10 +16904,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16950,7 +16939,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 @@ -16966,7 +16957,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo @@ -17002,10 +16992,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17025,19 +17014,21 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 +; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000 -; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 @@ -17048,7 +17039,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v6, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v7, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v8, 1.0 @@ -17077,10 +17067,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17100,6 +17089,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41200000 @@ -17108,8 +17100,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41500000 ; GFX11-NEXT: v_mov_b32_e32 v6, 0x41600000 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41700000 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 @@ -17132,11 +17122,10 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17156,6 +17145,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41200000 @@ -17164,8 +17156,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41500000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41600000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41700000 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0 @@ -17203,10 +17193,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17240,13 +17229,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17267,13 +17256,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17294,14 +17283,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17322,13 +17311,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17354,13 +17343,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17381,13 +17370,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17408,14 +17397,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17436,13 +17425,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17468,13 +17457,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17495,13 +17484,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17522,14 +17511,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17550,13 +17539,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17582,13 +17571,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17609,13 +17598,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17636,14 +17625,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17664,13 +17653,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17696,13 +17685,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17723,13 +17712,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17750,14 +17739,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17778,13 +17767,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17810,13 +17799,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17837,13 +17826,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17864,14 +17853,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17892,13 +17881,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17924,13 +17913,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17951,13 +17940,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17978,14 +17967,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18006,13 +17995,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18038,13 +18027,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18065,13 +18054,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18092,14 +18081,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18120,13 +18109,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18152,13 +18141,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18179,13 +18168,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18206,14 +18195,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18234,13 +18223,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18266,13 +18255,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18293,13 +18282,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18320,14 +18309,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18348,13 +18337,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18380,13 +18369,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18407,13 +18396,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18434,14 +18423,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18462,13 +18451,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18494,13 +18483,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18521,13 +18510,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18548,14 +18537,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18576,13 +18565,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18608,13 +18597,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18635,13 +18624,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18662,14 +18651,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18690,13 +18679,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18722,13 +18711,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18749,13 +18738,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18776,14 +18765,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18804,13 +18793,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 77c34b69820ce..576b481ca4ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -15,19 +15,19 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -49,19 +49,19 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -83,20 +83,20 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -130,8 +130,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s31, v0, 3 ; GFX9-NEXT: v_readlane_b32 s30, v0, 2 +; GFX9-NEXT: v_readlane_b32 s31, v0, 3 ; GFX9-NEXT: v_readlane_b32 s29, v0, 1 ; GFX9-NEXT: v_readlane_b32 s28, v0, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 @@ -157,8 +157,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s31, v0, 3 ; GFX10-NEXT: v_readlane_b32 s30, v0, 2 +; GFX10-NEXT: v_readlane_b32 s31, v0, 3 ; GFX10-NEXT: v_readlane_b32 s29, v0, 1 ; GFX10-NEXT: v_readlane_b32 s28, v0, 0 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -185,8 +185,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX11-NEXT: ; clobber ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v0, 3 ; GFX11-NEXT: v_readlane_b32 s30, v0, 2 +; GFX11-NEXT: v_readlane_b32 s31, v0, 3 ; GFX11-NEXT: v_readlane_b32 s29, v0, 1 ; GFX11-NEXT: v_readlane_b32 s28, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -209,12 +209,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND @@ -224,8 +224,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -246,12 +246,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s31 ; GFX10-NEXT: ;;#ASMEND @@ -261,8 +261,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -283,12 +283,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s31 ; GFX11-NEXT: ;;#ASMEND @@ -298,8 +298,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s31 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -325,12 +325,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 2 -; GFX9-NEXT: v_writelane_b32 v41, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: v_writelane_b32 v41, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND @@ -341,8 +341,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -362,12 +362,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 -; GFX10-NEXT: v_writelane_b32 v41, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v31 ; GFX10-NEXT: ;;#ASMEND @@ -378,8 +378,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -399,12 +399,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 -; GFX11-NEXT: v_writelane_b32 v41, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: v_writelane_b32 v41, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v31 ; GFX11-NEXT: ;;#ASMEND @@ -416,8 +416,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX11-NEXT: ; use v31 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: v_readlane_b32 s30, v41, 0 +; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -443,23 +443,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -480,23 +480,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: s_mov_b32 s4, s33 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -517,24 +517,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s33 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: s_mov_b32 s4, s33 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_mov_b32 s33, s4 +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s33 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -560,23 +559,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, s34 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s34, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -597,23 +596,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s34, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -634,24 +633,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s34 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: s_mov_b32 s4, s34 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: s_mov_b32 s34, s4 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s34 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -677,12 +675,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 2 -; GFX9-NEXT: v_writelane_b32 v41, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: v_writelane_b32 v41, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND @@ -691,8 +689,8 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -712,12 +710,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 -; GFX10-NEXT: v_writelane_b32 v41, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND @@ -726,8 +724,8 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -747,12 +745,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 -; GFX11-NEXT: v_writelane_b32 v41, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: v_writelane_b32 v41, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v40 ; GFX11-NEXT: ;;#ASMEND @@ -761,8 +759,8 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX11-NEXT: ; use v40 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: v_readlane_b32 s30, v41, 0 +; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -844,13 +842,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -871,13 +869,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -898,14 +896,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -929,13 +927,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -956,13 +954,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -983,14 +981,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1013,22 +1011,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -1049,22 +1047,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: s_mov_b32 s4, s40 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -1085,23 +1083,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: s_mov_b32 s4, s40 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s4 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -1127,13 +1124,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v41, s4, 0 ; GFX9-NEXT: v_writelane_b32 v41, s30, 1 +; GFX9-NEXT: v_writelane_b32 v41, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: v_writelane_b32 v41, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND @@ -1150,8 +1147,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 2 ; GFX9-NEXT: v_readlane_b32 s30, v41, 1 +; GFX9-NEXT: v_readlane_b32 s31, v41, 2 ; GFX9-NEXT: v_readlane_b32 s4, v41, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 3 @@ -1172,12 +1169,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 3 -; GFX10-NEXT: v_writelane_b32 v41, s4, 0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v41, s4, 0 ; GFX10-NEXT: v_writelane_b32 v41, s30, 1 +; GFX10-NEXT: v_writelane_b32 v41, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -1186,7 +1184,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v40, v32 -; GFX10-NEXT: v_writelane_b32 v41, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 @@ -1195,8 +1192,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 2 ; GFX10-NEXT: v_readlane_b32 s30, v41, 1 +; GFX10-NEXT: v_readlane_b32 s31, v41, 2 ; GFX10-NEXT: v_readlane_b32 s4, v41, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 3 @@ -1217,12 +1214,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 3 -; GFX11-NEXT: v_writelane_b32 v41, s4, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v41, s4, 0 ; GFX11-NEXT: v_writelane_b32 v41, s30, 1 +; GFX11-NEXT: v_writelane_b32 v41, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND @@ -1231,7 +1229,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: ; def v32 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v40, v32 -; GFX11-NEXT: v_writelane_b32 v41, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s4 @@ -1240,8 +1237,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: ; use v40 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 2 ; GFX11-NEXT: v_readlane_b32 s30, v41, 1 +; GFX11-NEXT: v_readlane_b32 s31, v41, 2 ; GFX11-NEXT: v_readlane_b32 s4, v41, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 3110bbae6101c..891c6e37185d3 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -29,13 +29,13 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_i1@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -54,13 +54,13 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_i1@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -79,14 +79,14 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_i1@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_i1@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_i1@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i1@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -131,13 +131,13 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -156,13 +156,13 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -181,14 +181,14 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -227,13 +227,13 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -252,13 +252,13 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -277,14 +277,14 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_2xi16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_2xi16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_2xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_2xi16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -331,13 +331,13 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -356,13 +356,13 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -381,14 +381,14 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_3xi16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_3xi16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v2, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_3xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_3xi16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v2, 1 ; GFX11-NEXT: v_readlane_b32 s30, v2, 0 +; GFX11-NEXT: v_readlane_b32 s31, v2, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -680,9 +680,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v100, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x2400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill @@ -716,7 +713,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v100, s30, 0 ; GFX9-NEXT: v_writelane_b32 v100, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_dword v95, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -750,8 +750,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v100, 1 ; GFX9-NEXT: v_readlane_b32 s30, v100, 0 +; GFX9-NEXT: v_readlane_b32 s31, v100, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -769,9 +769,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v100, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x1200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill @@ -805,7 +802,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v100, s30, 0 ; GFX10-NEXT: v_writelane_b32 v100, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1f ; GFX10-NEXT: buffer_load_dword v95, off, s[0:3], s33 @@ -840,8 +840,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:116 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:120 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:124 -; GFX10-NEXT: v_readlane_b32 s31, v100, 1 ; GFX10-NEXT: v_readlane_b32 s30, v100, 0 +; GFX10-NEXT: v_readlane_b32 s31, v100, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -859,44 +859,76 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v100, s33 offset:128 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v100, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_100xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_100xi32@abs32@lo ; GFX11-NEXT: s_addk_i32 s32, 0x90 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s33 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s33 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s33 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s33 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s33 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s33 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s33 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s33 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s33 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s33 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s33 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s33 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s33 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s33 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s33 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s33 +; GFX11-NEXT: v_writelane_b32 v100, s30, 0 ; GFX11-NEXT: v_writelane_b32 v100, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_100xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_100xi32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_load_b32 v95, off, s33 @@ -931,8 +963,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:116 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:120 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:124 -; GFX11-NEXT: v_readlane_b32 s31, v100, 1 ; GFX11-NEXT: v_readlane_b32 s30, v100, 0 +; GFX11-NEXT: v_readlane_b32 s31, v100, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v100, off, s33 offset:128 ; 4-byte Folded Reload @@ -2145,14 +2177,14 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-NEXT: s_mov_b32 s38, s34 ; GFX9-NEXT: s_mov_b32 s34, s32 ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi ; GFX9-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s38 ; GFX9-NEXT: s_xor_saveexec_b64 s[36:37], -1 @@ -2175,14 +2207,14 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10-NEXT: s_mov_b32 s38, s34 ; GFX10-NEXT: s_mov_b32 s34, s32 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi ; GFX10-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo -; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s38 ; GFX10-NEXT: s_xor_saveexec_b32 s36, -1 @@ -2206,15 +2238,15 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11-NEXT: s_mov_b32 s36, s34 ; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: v_writelane_b32 v5, s30, 0 +; GFX11-NEXT: s_addk_i32 s32, 0x1800 +; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, s33 ; GFX11-NEXT: s_mov_b32 s1, return_512xi32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, return_512xi32@abs32@lo -; GFX11-NEXT: s_addk_i32 s32, 0x1800 -; GFX11-NEXT: v_writelane_b32 v5, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v5, 1 ; GFX11-NEXT: v_readlane_b32 s30, v5, 0 +; GFX11-NEXT: v_readlane_b32 s31, v5, 1 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s36 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -2520,17 +2552,29 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 ; GFX11-NEXT: s_clause 0x11 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 @@ -2640,7 +2684,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: s_mov_b32 s38, s34 ; GFX9-NEXT: s_mov_b32 s34, s32 ; GFX9-NEXT: s_add_i32 s32, s32, 0x28000 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -2656,6 +2699,9 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s30, 0 +; GFX9-NEXT: v_writelane_b32 v63, s31, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -2698,7 +2744,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 ; GFX9-NEXT: s_mov_b32 s37, return_72xi32@abs32@hi ; GFX9-NEXT: s_mov_b32 s36, return_72xi32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 0x200, v0 @@ -2733,7 +2778,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: v_mov_b32_e32 v29, 0 ; GFX9-NEXT: v_mov_b32_e32 v30, 0 ; GFX9-NEXT: v_mov_b32_e32 v31, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:636 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:640 @@ -2889,8 +2933,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 ; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s31, v63, 1 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s38 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2912,7 +2956,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: s_mov_b32 exec_lo, s36 ; GFX10-NEXT: s_mov_b32 s38, s34 ; GFX10-NEXT: s_mov_b32 s34, s32 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x14000 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill @@ -2930,7 +2973,12 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v63, s30, 0 +; GFX10-NEXT: v_writelane_b32 v63, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -2973,14 +3021,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v6, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_mov_b32_e32 v10, 0 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 @@ -3006,7 +3051,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: v_mov_b32_e32 v31, 0 ; GFX10-NEXT: s_mov_b32 s37, return_72xi32@abs32@hi ; GFX10-NEXT: s_mov_b32 s36, return_72xi32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v63, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX10-NEXT: s_clause 0x28 ; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:636 @@ -3167,8 +3211,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 -; GFX10-NEXT: v_readlane_b32 s31, v63, 1 ; GFX10-NEXT: v_readlane_b32 s30, v63, 0 +; GFX10-NEXT: v_readlane_b32 s31, v63, 1 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s38 ; GFX10-NEXT: s_or_saveexec_b32 s36, -1 @@ -3191,27 +3235,40 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_mov_b32 s36, s34 ; GFX11-NEXT: s_mov_b32 s34, s32 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: s_mov_b32 s1, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_addk_i32 s32, 0xa00 ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s33 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 +; GFX11-NEXT: v_writelane_b32 v60, s31, 1 +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: s_mov_b32 s1, s0 +; GFX11-NEXT: s_mov_b32 s2, s0 +; GFX11-NEXT: s_mov_b32 s3, s0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 ; GFX11-NEXT: s_add_i32 s1, s32, 0x90 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 @@ -3232,7 +3289,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 ; GFX11-NEXT: s_add_i32 s2, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 @@ -3253,7 +3309,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 ; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v60, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 @@ -3365,8 +3421,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:36 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:40 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:44 -; GFX11-NEXT: v_readlane_b32 s31, v60, 1 ; GFX11-NEXT: v_readlane_b32 s30, v60, 0 +; GFX11-NEXT: v_readlane_b32 s31, v60, 1 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s36 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 diff --git a/llvm/test/CodeGen/AMDGPU/global-alias.ll b/llvm/test/CodeGen/AMDGPU/global-alias.ll index d8df20eb69452..4c7bef4aec091 100644 --- a/llvm/test/CodeGen/AMDGPU/global-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/global-alias.ll @@ -35,8 +35,8 @@ define void @bar() { ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 10d61deed71cc..424aaaea11722 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -9,28 +9,30 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v6, s30, 0 -; CHECK-NEXT: v_writelane_b32 v6, s31, 1 -; CHECK-NEXT: v_writelane_b32 v6, s36, 2 -; CHECK-NEXT: v_writelane_b32 v6, s37, 3 -; CHECK-NEXT: v_writelane_b32 v6, s38, 4 -; CHECK-NEXT: v_writelane_b32 v6, s39, 5 -; CHECK-NEXT: v_writelane_b32 v6, s48, 6 -; CHECK-NEXT: v_writelane_b32 v6, s49, 7 -; CHECK-NEXT: v_writelane_b32 v6, s50, 8 -; CHECK-NEXT: v_writelane_b32 v6, s51, 9 -; CHECK-NEXT: v_writelane_b32 v6, s52, 10 -; CHECK-NEXT: v_writelane_b32 v6, s53, 11 -; CHECK-NEXT: v_writelane_b32 v6, s54, 12 -; CHECK-NEXT: v_writelane_b32 v6, s55, 13 -; CHECK-NEXT: v_writelane_b32 v6, s64, 14 -; CHECK-NEXT: v_writelane_b32 v6, s65, 15 -; CHECK-NEXT: v_writelane_b32 v6, s66, 16 -; CHECK-NEXT: v_writelane_b32 v6, s67, 17 -; CHECK-NEXT: v_writelane_b32 v6, s68, 18 +; CHECK-NEXT: v_writelane_b32 v6, s36, 0 +; CHECK-NEXT: v_writelane_b32 v6, s37, 1 +; CHECK-NEXT: v_writelane_b32 v6, s38, 2 +; CHECK-NEXT: v_writelane_b32 v6, s39, 3 +; CHECK-NEXT: v_writelane_b32 v6, s48, 4 +; CHECK-NEXT: v_writelane_b32 v6, s49, 5 +; CHECK-NEXT: v_writelane_b32 v6, s50, 6 +; CHECK-NEXT: v_writelane_b32 v6, s51, 7 +; CHECK-NEXT: v_writelane_b32 v6, s52, 8 +; CHECK-NEXT: v_writelane_b32 v6, s53, 9 +; CHECK-NEXT: v_writelane_b32 v6, s54, 10 +; CHECK-NEXT: v_writelane_b32 v6, s55, 11 +; CHECK-NEXT: v_writelane_b32 v6, s64, 12 +; CHECK-NEXT: v_writelane_b32 v6, s65, 13 +; CHECK-NEXT: v_writelane_b32 v6, s66, 14 +; CHECK-NEXT: v_writelane_b32 v6, s67, 15 +; CHECK-NEXT: v_writelane_b32 v6, s68, 16 +; CHECK-NEXT: v_writelane_b32 v6, s69, 17 +; CHECK-NEXT: v_writelane_b32 v6, s70, 18 +; CHECK-NEXT: v_writelane_b32 v6, s71, 19 +; CHECK-NEXT: v_writelane_b32 v6, s30, 20 +; CHECK-NEXT: v_writelane_b32 v6, s31, 21 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: v_writelane_b32 v6, s69, 19 ; CHECK-NEXT: s_mov_b32 s68, 0 ; CHECK-NEXT: s_mov_b32 s69, s4 ; CHECK-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 @@ -40,11 +42,11 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130 ; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane -; CHECK-NEXT: v_writelane_b32 v6, s70, 20 -; CHECK-NEXT: v_writelane_b32 v6, s71, 21 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0 +; CHECK-NEXT: s_mov_b32 s70, s68 ; CHECK-NEXT: v_writelane_b32 v7, s8, 0 ; CHECK-NEXT: v_writelane_b32 v7, s9, 1 ; CHECK-NEXT: v_writelane_b32 v7, s10, 2 @@ -77,9 +79,7 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v7, s65, 29 ; CHECK-NEXT: v_writelane_b32 v7, s66, 30 ; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0 -; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0 ; CHECK-NEXT: s_mov_b32 s69, s68 -; CHECK-NEXT: s_mov_b32 s70, s68 ; CHECK-NEXT: s_mov_b32 s71, s68 ; CHECK-NEXT: v_writelane_b32 v7, s67, 31 ; CHECK-NEXT: image_sample_lz v3, v[1:2], s[60:67], s[68:71] dmask:0x1 @@ -225,29 +225,29 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] -; CHECK-NEXT: v_readlane_b32 s71, v6, 21 -; CHECK-NEXT: v_readlane_b32 s70, v6, 20 -; CHECK-NEXT: v_readlane_b32 s69, v6, 19 -; CHECK-NEXT: v_readlane_b32 s68, v6, 18 -; CHECK-NEXT: v_readlane_b32 s67, v6, 17 -; CHECK-NEXT: v_readlane_b32 s66, v6, 16 -; CHECK-NEXT: v_readlane_b32 s65, v6, 15 -; CHECK-NEXT: v_readlane_b32 s64, v6, 14 -; CHECK-NEXT: v_readlane_b32 s55, v6, 13 -; CHECK-NEXT: v_readlane_b32 s54, v6, 12 -; CHECK-NEXT: v_readlane_b32 s53, v6, 11 -; CHECK-NEXT: v_readlane_b32 s52, v6, 10 +; CHECK-NEXT: v_readlane_b32 s30, v6, 20 +; CHECK-NEXT: v_readlane_b32 s31, v6, 21 +; CHECK-NEXT: v_readlane_b32 s71, v6, 19 +; CHECK-NEXT: v_readlane_b32 s70, v6, 18 +; CHECK-NEXT: v_readlane_b32 s69, v6, 17 +; CHECK-NEXT: v_readlane_b32 s68, v6, 16 +; CHECK-NEXT: v_readlane_b32 s67, v6, 15 +; CHECK-NEXT: v_readlane_b32 s66, v6, 14 +; CHECK-NEXT: v_readlane_b32 s65, v6, 13 +; CHECK-NEXT: v_readlane_b32 s64, v6, 12 +; CHECK-NEXT: v_readlane_b32 s55, v6, 11 +; CHECK-NEXT: v_readlane_b32 s54, v6, 10 +; CHECK-NEXT: v_readlane_b32 s53, v6, 9 +; CHECK-NEXT: v_readlane_b32 s52, v6, 8 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_readlane_b32 s51, v6, 9 -; CHECK-NEXT: v_readlane_b32 s50, v6, 8 -; CHECK-NEXT: v_readlane_b32 s49, v6, 7 -; CHECK-NEXT: v_readlane_b32 s48, v6, 6 -; CHECK-NEXT: v_readlane_b32 s39, v6, 5 -; CHECK-NEXT: v_readlane_b32 s38, v6, 4 -; CHECK-NEXT: v_readlane_b32 s37, v6, 3 -; CHECK-NEXT: v_readlane_b32 s36, v6, 2 -; CHECK-NEXT: v_readlane_b32 s31, v6, 1 -; CHECK-NEXT: v_readlane_b32 s30, v6, 0 +; CHECK-NEXT: v_readlane_b32 s51, v6, 7 +; CHECK-NEXT: v_readlane_b32 s50, v6, 6 +; CHECK-NEXT: v_readlane_b32 s49, v6, 5 +; CHECK-NEXT: v_readlane_b32 s48, v6, 4 +; CHECK-NEXT: v_readlane_b32 s39, v6, 3 +; CHECK-NEXT: v_readlane_b32 s38, v6, 2 +; CHECK-NEXT: v_readlane_b32 s37, v6, 1 +; CHECK-NEXT: v_readlane_b32 s36, v6, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index a208cfdb197af..2aaaff1ecc407 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -128,24 +128,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -175,24 +175,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -212,24 +212,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -259,24 +259,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -300,24 +300,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -350,24 +350,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -387,24 +387,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -435,24 +435,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -476,24 +476,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -525,24 +525,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -562,24 +562,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -611,24 +611,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -653,26 +653,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 20 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 -; GCN-NEXT: v_writelane_b32 v40, s66, 18 -; GCN-NEXT: v_writelane_b32 v40, s67, 19 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s66, 16 +; GCN-NEXT: v_writelane_b32 v40, s67, 17 +; GCN-NEXT: v_writelane_b32 v40, s30, 18 +; GCN-NEXT: v_writelane_b32 v40, s31, 19 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -709,26 +709,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: s_mov_b64 exec, s[64:65] ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s67, v40, 19 -; GCN-NEXT: v_readlane_b32 s66, v40, 18 -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 18 +; GCN-NEXT: v_readlane_b32 s31, v40, 19 +; GCN-NEXT: v_readlane_b32 s67, v40, 17 +; GCN-NEXT: v_readlane_b32 s66, v40, 16 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 20 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -748,26 +748,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 20 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 -; GISEL-NEXT: v_writelane_b32 v40, s66, 18 -; GISEL-NEXT: v_writelane_b32 v40, s67, 19 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s66, 16 +; GISEL-NEXT: v_writelane_b32 v40, s67, 17 +; GISEL-NEXT: v_writelane_b32 v40, s30, 18 +; GISEL-NEXT: v_writelane_b32 v40, s31, 19 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -804,26 +804,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b64 exec, s[64:65] ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s67, v40, 19 -; GISEL-NEXT: v_readlane_b32 s66, v40, 18 -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 18 +; GISEL-NEXT: v_readlane_b32 s31, v40, 19 +; GISEL-NEXT: v_readlane_b32 s67, v40, 17 +; GISEL-NEXT: v_readlane_b32 s66, v40, 16 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 20 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -853,22 +853,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 @@ -882,22 +882,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -915,22 +915,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0 @@ -944,22 +944,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -982,22 +982,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v41, s30, 0 -; GCN-NEXT: v_writelane_b32 v41, s31, 1 -; GCN-NEXT: v_writelane_b32 v41, s34, 2 -; GCN-NEXT: v_writelane_b32 v41, s35, 3 -; GCN-NEXT: v_writelane_b32 v41, s36, 4 -; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s48, 8 -; GCN-NEXT: v_writelane_b32 v41, s49, 9 -; GCN-NEXT: v_writelane_b32 v41, s50, 10 -; GCN-NEXT: v_writelane_b32 v41, s51, 11 -; GCN-NEXT: v_writelane_b32 v41, s52, 12 -; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s54, 14 -; GCN-NEXT: v_writelane_b32 v41, s55, 15 +; GCN-NEXT: v_writelane_b32 v41, s34, 0 +; GCN-NEXT: v_writelane_b32 v41, s35, 1 +; GCN-NEXT: v_writelane_b32 v41, s36, 2 +; GCN-NEXT: v_writelane_b32 v41, s37, 3 +; GCN-NEXT: v_writelane_b32 v41, s38, 4 +; GCN-NEXT: v_writelane_b32 v41, s39, 5 +; GCN-NEXT: v_writelane_b32 v41, s48, 6 +; GCN-NEXT: v_writelane_b32 v41, s49, 7 +; GCN-NEXT: v_writelane_b32 v41, s50, 8 +; GCN-NEXT: v_writelane_b32 v41, s51, 9 +; GCN-NEXT: v_writelane_b32 v41, s52, 10 +; GCN-NEXT: v_writelane_b32 v41, s53, 11 +; GCN-NEXT: v_writelane_b32 v41, s54, 12 +; GCN-NEXT: v_writelane_b32 v41, s55, 13 +; GCN-NEXT: v_writelane_b32 v41, s30, 14 +; GCN-NEXT: v_writelane_b32 v41, s31, 15 ; GCN-NEXT: v_mov_b32_e32 v40, v0 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1013,22 +1013,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v40 -; GCN-NEXT: v_readlane_b32 s55, v41, 15 -; GCN-NEXT: v_readlane_b32 s54, v41, 14 -; GCN-NEXT: v_readlane_b32 s53, v41, 13 -; GCN-NEXT: v_readlane_b32 s52, v41, 12 -; GCN-NEXT: v_readlane_b32 s51, v41, 11 -; GCN-NEXT: v_readlane_b32 s50, v41, 10 -; GCN-NEXT: v_readlane_b32 s49, v41, 9 -; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 -; GCN-NEXT: v_readlane_b32 s37, v41, 5 -; GCN-NEXT: v_readlane_b32 s36, v41, 4 -; GCN-NEXT: v_readlane_b32 s35, v41, 3 -; GCN-NEXT: v_readlane_b32 s34, v41, 2 -; GCN-NEXT: v_readlane_b32 s31, v41, 1 -; GCN-NEXT: v_readlane_b32 s30, v41, 0 +; GCN-NEXT: v_readlane_b32 s30, v41, 14 +; GCN-NEXT: v_readlane_b32 s31, v41, 15 +; GCN-NEXT: v_readlane_b32 s55, v41, 13 +; GCN-NEXT: v_readlane_b32 s54, v41, 12 +; GCN-NEXT: v_readlane_b32 s53, v41, 11 +; GCN-NEXT: v_readlane_b32 s52, v41, 10 +; GCN-NEXT: v_readlane_b32 s51, v41, 9 +; GCN-NEXT: v_readlane_b32 s50, v41, 8 +; GCN-NEXT: v_readlane_b32 s49, v41, 7 +; GCN-NEXT: v_readlane_b32 s48, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 5 +; GCN-NEXT: v_readlane_b32 s38, v41, 4 +; GCN-NEXT: v_readlane_b32 s37, v41, 3 +; GCN-NEXT: v_readlane_b32 s36, v41, 2 +; GCN-NEXT: v_readlane_b32 s35, v41, 1 +; GCN-NEXT: v_readlane_b32 s34, v41, 0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1048,22 +1048,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v41, s30, 0 -; GISEL-NEXT: v_writelane_b32 v41, s31, 1 -; GISEL-NEXT: v_writelane_b32 v41, s34, 2 -; GISEL-NEXT: v_writelane_b32 v41, s35, 3 -; GISEL-NEXT: v_writelane_b32 v41, s36, 4 -; GISEL-NEXT: v_writelane_b32 v41, s37, 5 -; GISEL-NEXT: v_writelane_b32 v41, s38, 6 -; GISEL-NEXT: v_writelane_b32 v41, s39, 7 -; GISEL-NEXT: v_writelane_b32 v41, s48, 8 -; GISEL-NEXT: v_writelane_b32 v41, s49, 9 -; GISEL-NEXT: v_writelane_b32 v41, s50, 10 -; GISEL-NEXT: v_writelane_b32 v41, s51, 11 -; GISEL-NEXT: v_writelane_b32 v41, s52, 12 -; GISEL-NEXT: v_writelane_b32 v41, s53, 13 -; GISEL-NEXT: v_writelane_b32 v41, s54, 14 -; GISEL-NEXT: v_writelane_b32 v41, s55, 15 +; GISEL-NEXT: v_writelane_b32 v41, s34, 0 +; GISEL-NEXT: v_writelane_b32 v41, s35, 1 +; GISEL-NEXT: v_writelane_b32 v41, s36, 2 +; GISEL-NEXT: v_writelane_b32 v41, s37, 3 +; GISEL-NEXT: v_writelane_b32 v41, s38, 4 +; GISEL-NEXT: v_writelane_b32 v41, s39, 5 +; GISEL-NEXT: v_writelane_b32 v41, s48, 6 +; GISEL-NEXT: v_writelane_b32 v41, s49, 7 +; GISEL-NEXT: v_writelane_b32 v41, s50, 8 +; GISEL-NEXT: v_writelane_b32 v41, s51, 9 +; GISEL-NEXT: v_writelane_b32 v41, s52, 10 +; GISEL-NEXT: v_writelane_b32 v41, s53, 11 +; GISEL-NEXT: v_writelane_b32 v41, s54, 12 +; GISEL-NEXT: v_writelane_b32 v41, s55, 13 +; GISEL-NEXT: v_writelane_b32 v41, s30, 14 +; GISEL-NEXT: v_writelane_b32 v41, s31, 15 ; GISEL-NEXT: v_mov_b32_e32 v40, v0 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1079,22 +1079,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v40 -; GISEL-NEXT: v_readlane_b32 s55, v41, 15 -; GISEL-NEXT: v_readlane_b32 s54, v41, 14 -; GISEL-NEXT: v_readlane_b32 s53, v41, 13 -; GISEL-NEXT: v_readlane_b32 s52, v41, 12 -; GISEL-NEXT: v_readlane_b32 s51, v41, 11 -; GISEL-NEXT: v_readlane_b32 s50, v41, 10 -; GISEL-NEXT: v_readlane_b32 s49, v41, 9 -; GISEL-NEXT: v_readlane_b32 s48, v41, 8 -; GISEL-NEXT: v_readlane_b32 s39, v41, 7 -; GISEL-NEXT: v_readlane_b32 s38, v41, 6 -; GISEL-NEXT: v_readlane_b32 s37, v41, 5 -; GISEL-NEXT: v_readlane_b32 s36, v41, 4 -; GISEL-NEXT: v_readlane_b32 s35, v41, 3 -; GISEL-NEXT: v_readlane_b32 s34, v41, 2 -; GISEL-NEXT: v_readlane_b32 s31, v41, 1 -; GISEL-NEXT: v_readlane_b32 s30, v41, 0 +; GISEL-NEXT: v_readlane_b32 s30, v41, 14 +; GISEL-NEXT: v_readlane_b32 s31, v41, 15 +; GISEL-NEXT: v_readlane_b32 s55, v41, 13 +; GISEL-NEXT: v_readlane_b32 s54, v41, 12 +; GISEL-NEXT: v_readlane_b32 s53, v41, 11 +; GISEL-NEXT: v_readlane_b32 s52, v41, 10 +; GISEL-NEXT: v_readlane_b32 s51, v41, 9 +; GISEL-NEXT: v_readlane_b32 s50, v41, 8 +; GISEL-NEXT: v_readlane_b32 s49, v41, 7 +; GISEL-NEXT: v_readlane_b32 s48, v41, 6 +; GISEL-NEXT: v_readlane_b32 s39, v41, 5 +; GISEL-NEXT: v_readlane_b32 s38, v41, 4 +; GISEL-NEXT: v_readlane_b32 s37, v41, 3 +; GISEL-NEXT: v_readlane_b32 s36, v41, 2 +; GISEL-NEXT: v_readlane_b32 s35, v41, 1 +; GISEL-NEXT: v_readlane_b32 s34, v41, 0 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1121,22 +1121,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 @@ -1152,22 +1152,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1185,22 +1185,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1 @@ -1216,22 +1216,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1254,22 +1254,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s6, v0 @@ -1282,22 +1282,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1315,22 +1315,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0 @@ -1343,22 +1343,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index c3f391786f878..3be6682bc4ffa 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -25,18 +25,17 @@ define void @f0() { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v4, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v4, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, f1@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, f1@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v4, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v4, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v4, 1 ; GFX11-NEXT: v_readlane_b32 s30, v4, 0 +; GFX11-NEXT: v_readlane_b32 s31, v4, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v4, off, s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll index 5a344c8ee37f9..e6d93d857d5a0 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll @@ -7,7 +7,7 @@ define fastcc i32 @foo() { ; CHECK-LABEL: name: foo ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 @@ -221,25 +221,26 @@ define fastcc i32 @foo() { ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32 ; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17 ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr40, 2, 32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr127, 0, 32, $vgpr127, 1, 32 ; CHECK-NEXT: BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $scc, implicit-def $sgpr17 { ; CHECK-NEXT: $sgpr16_sgpr17 = S_GETPC_B64 ; CHECK-NEXT: $sgpr16 = S_ADD_U32 internal $sgpr16, target-flags(amdgpu-gotprel32-lo) @bar + 4, implicit-def $scc ; CHECK-NEXT: $sgpr17 = S_ADDC_U32 internal $sgpr17, target-flags(amdgpu-gotprel32-hi) @bar + 12, implicit-def $scc, implicit internal $scc ; CHECK-NEXT: } - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40 - ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) - ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; CHECK-NEXT: BUFFER_GL1_INV implicit $exec ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40 + ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) + ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} @@ -250,12 +251,12 @@ define fastcc i32 @foo() { ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.DummyReturnBlock: + ; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0, implicit-def $sgpr30_sgpr31 ; CHECK-NEXT: $sgpr31 = V_READLANE_B32 $vgpr40, 1 - ; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0 ; CHECK-NEXT: $sgpr32 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr40, 2 ; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 killed $sgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index d1ba892d7f7e1..2f4d5ee3cbce5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -984,10 +984,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 -; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 -; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 @@ -1004,6 +1000,10 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 +; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 +; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 @@ -1070,10 +1070,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 -; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 @@ -1089,6 +1085,10 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 @@ -1429,10 +1429,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 -; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 -; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 @@ -1449,6 +1445,10 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 +; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 +; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 @@ -1515,10 +1515,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 -; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 @@ -1534,6 +1530,10 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index f971080e02c5b..72c4397754ce6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -2375,6 +2375,12 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 @@ -2402,12 +2408,6 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) ; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index dfd67873c3b86..526988d1f36ac 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -2375,6 +2375,12 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 @@ -2402,12 +2408,6 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) ; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index 4b5a7c207055a..52671f5d3deb4 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -17,8 +17,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART @@ -46,8 +46,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART @@ -74,8 +74,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 @@ -108,9 +109,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -139,9 +139,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -168,6 +168,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -175,7 +176,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -196,13 +196,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -397,10 +397,10 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -433,9 +433,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -467,9 +467,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -501,8 +501,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 -; GFX11-NEXT: s_add_i32 s0, s33, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s0, s33, 64 +; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 @@ -511,7 +512,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc @@ -539,8 +540,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s33 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -574,16 +575,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: s_movk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART @@ -608,6 +609,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -615,8 +618,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -642,13 +643,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s33, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -681,8 +682,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 @@ -705,8 +706,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 @@ -728,13 +729,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc @@ -804,9 +804,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 -; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -827,11 +827,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc @@ -989,8 +989,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 @@ -1018,9 +1018,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 @@ -1047,9 +1047,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 @@ -1076,8 +1076,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 -; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 @@ -1109,13 +1109,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 @@ -1136,11 +1137,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX8-NEXT: s_mov_b32 s55, 64 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART @@ -1165,10 +1166,10 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 -; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -1194,11 +1195,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc @@ -1228,8 +1229,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 @@ -1255,8 +1256,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 @@ -1281,8 +1282,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_add_i32 s1, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_mov_b32 s55, s1 @@ -1311,14 +1312,15 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_mov_b32 s32, s33 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 +; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -1390,8 +1392,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 -; GFX942-NEXT: s_add_i32 s1, s33, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_add_i32 s1, s33, 64 ; GFX942-NEXT: s_mov_b32 s55, s1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 @@ -1529,8 +1531,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 +; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: s_add_i32 s55, s4, 0x442c ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 @@ -1556,8 +1558,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: s_add_i32 s55, s4, 0x442c ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 @@ -1677,8 +1679,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: s_add_i32 s1, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s1, s32, 64 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-NEXT: s_add_i32 s55, s32, s0 @@ -1796,8 +1798,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 ; GFX942-NEXT: s_add_i32 s55, s32, s0 ; GFX942-NEXT: s_addk_i32 s55, 0x4040 ; GFX942-NEXT: s_add_i32 s0, s32, 64 diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index 17581bcb61e99..b0fee0fe0aa19 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -37,26 +37,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v23, s30, 0 -; GFX7-NEXT: v_writelane_b32 v23, s31, 1 -; GFX7-NEXT: v_writelane_b32 v23, s33, 2 -; GFX7-NEXT: v_writelane_b32 v23, s34, 3 -; GFX7-NEXT: v_writelane_b32 v23, s35, 4 -; GFX7-NEXT: v_writelane_b32 v23, s36, 5 -; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s48, 9 -; GFX7-NEXT: v_writelane_b32 v23, s49, 10 -; GFX7-NEXT: v_writelane_b32 v23, s50, 11 -; GFX7-NEXT: v_writelane_b32 v23, s51, 12 -; GFX7-NEXT: v_writelane_b32 v23, s52, 13 -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 +; GFX7-NEXT: v_writelane_b32 v23, s33, 0 +; GFX7-NEXT: v_writelane_b32 v23, s34, 1 +; GFX7-NEXT: v_writelane_b32 v23, s35, 2 +; GFX7-NEXT: v_writelane_b32 v23, s36, 3 +; GFX7-NEXT: v_writelane_b32 v23, s37, 4 +; GFX7-NEXT: v_writelane_b32 v23, s38, 5 +; GFX7-NEXT: v_writelane_b32 v23, s39, 6 +; GFX7-NEXT: v_writelane_b32 v23, s48, 7 +; GFX7-NEXT: v_writelane_b32 v23, s49, 8 +; GFX7-NEXT: v_writelane_b32 v23, s50, 9 +; GFX7-NEXT: v_writelane_b32 v23, s51, 10 +; GFX7-NEXT: v_writelane_b32 v23, s52, 11 +; GFX7-NEXT: v_writelane_b32 v23, s53, 12 +; GFX7-NEXT: v_writelane_b32 v23, s54, 13 +; GFX7-NEXT: v_writelane_b32 v23, s55, 14 +; GFX7-NEXT: v_writelane_b32 v23, s30, 15 +; GFX7-NEXT: v_writelane_b32 v23, s31, 16 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -73,23 +73,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v23, 16 -; GFX7-NEXT: v_readlane_b32 s54, v23, 15 -; GFX7-NEXT: v_readlane_b32 s53, v23, 14 -; GFX7-NEXT: v_readlane_b32 s52, v23, 13 -; GFX7-NEXT: v_readlane_b32 s51, v23, 12 -; GFX7-NEXT: v_readlane_b32 s50, v23, 11 -; GFX7-NEXT: v_readlane_b32 s49, v23, 10 -; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 -; GFX7-NEXT: v_readlane_b32 s37, v23, 6 -; GFX7-NEXT: v_readlane_b32 s36, v23, 5 -; GFX7-NEXT: v_readlane_b32 s35, v23, 4 -; GFX7-NEXT: v_readlane_b32 s34, v23, 3 -; GFX7-NEXT: v_readlane_b32 s33, v23, 2 -; GFX7-NEXT: v_readlane_b32 s31, v23, 1 -; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: v_readlane_b32 s30, v23, 15 +; GFX7-NEXT: v_readlane_b32 s31, v23, 16 +; GFX7-NEXT: v_readlane_b32 s55, v23, 14 +; GFX7-NEXT: v_readlane_b32 s54, v23, 13 +; GFX7-NEXT: v_readlane_b32 s53, v23, 12 +; GFX7-NEXT: v_readlane_b32 s52, v23, 11 +; GFX7-NEXT: v_readlane_b32 s51, v23, 10 +; GFX7-NEXT: v_readlane_b32 s50, v23, 9 +; GFX7-NEXT: v_readlane_b32 s49, v23, 8 +; GFX7-NEXT: v_readlane_b32 s48, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 6 +; GFX7-NEXT: v_readlane_b32 s38, v23, 5 +; GFX7-NEXT: v_readlane_b32 s37, v23, 4 +; GFX7-NEXT: v_readlane_b32 s36, v23, 3 +; GFX7-NEXT: v_readlane_b32 s35, v23, 2 +; GFX7-NEXT: v_readlane_b32 s34, v23, 1 +; GFX7-NEXT: v_readlane_b32 s33, v23, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -104,26 +104,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v23, s30, 0 -; GFX8-NEXT: v_writelane_b32 v23, s31, 1 -; GFX8-NEXT: v_writelane_b32 v23, s33, 2 -; GFX8-NEXT: v_writelane_b32 v23, s34, 3 -; GFX8-NEXT: v_writelane_b32 v23, s35, 4 -; GFX8-NEXT: v_writelane_b32 v23, s36, 5 -; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s38, 7 -; GFX8-NEXT: v_writelane_b32 v23, s39, 8 -; GFX8-NEXT: v_writelane_b32 v23, s48, 9 -; GFX8-NEXT: v_writelane_b32 v23, s49, 10 -; GFX8-NEXT: v_writelane_b32 v23, s50, 11 -; GFX8-NEXT: v_writelane_b32 v23, s51, 12 -; GFX8-NEXT: v_writelane_b32 v23, s52, 13 -; GFX8-NEXT: v_writelane_b32 v23, s53, 14 +; GFX8-NEXT: v_writelane_b32 v23, s33, 0 +; GFX8-NEXT: v_writelane_b32 v23, s34, 1 +; GFX8-NEXT: v_writelane_b32 v23, s35, 2 +; GFX8-NEXT: v_writelane_b32 v23, s36, 3 +; GFX8-NEXT: v_writelane_b32 v23, s37, 4 +; GFX8-NEXT: v_writelane_b32 v23, s38, 5 +; GFX8-NEXT: v_writelane_b32 v23, s39, 6 +; GFX8-NEXT: v_writelane_b32 v23, s48, 7 +; GFX8-NEXT: v_writelane_b32 v23, s49, 8 +; GFX8-NEXT: v_writelane_b32 v23, s50, 9 +; GFX8-NEXT: v_writelane_b32 v23, s51, 10 +; GFX8-NEXT: v_writelane_b32 v23, s52, 11 +; GFX8-NEXT: v_writelane_b32 v23, s53, 12 +; GFX8-NEXT: v_writelane_b32 v23, s54, 13 +; GFX8-NEXT: v_writelane_b32 v23, s55, 14 +; GFX8-NEXT: v_writelane_b32 v23, s30, 15 +; GFX8-NEXT: v_writelane_b32 v23, s31, 16 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -141,23 +141,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v23, 16 -; GFX8-NEXT: v_readlane_b32 s54, v23, 15 -; GFX8-NEXT: v_readlane_b32 s53, v23, 14 -; GFX8-NEXT: v_readlane_b32 s52, v23, 13 -; GFX8-NEXT: v_readlane_b32 s51, v23, 12 -; GFX8-NEXT: v_readlane_b32 s50, v23, 11 -; GFX8-NEXT: v_readlane_b32 s49, v23, 10 -; GFX8-NEXT: v_readlane_b32 s48, v23, 9 -; GFX8-NEXT: v_readlane_b32 s39, v23, 8 -; GFX8-NEXT: v_readlane_b32 s38, v23, 7 -; GFX8-NEXT: v_readlane_b32 s37, v23, 6 -; GFX8-NEXT: v_readlane_b32 s36, v23, 5 -; GFX8-NEXT: v_readlane_b32 s35, v23, 4 -; GFX8-NEXT: v_readlane_b32 s34, v23, 3 -; GFX8-NEXT: v_readlane_b32 s33, v23, 2 -; GFX8-NEXT: v_readlane_b32 s31, v23, 1 -; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: v_readlane_b32 s30, v23, 15 +; GFX8-NEXT: v_readlane_b32 s31, v23, 16 +; GFX8-NEXT: v_readlane_b32 s55, v23, 14 +; GFX8-NEXT: v_readlane_b32 s54, v23, 13 +; GFX8-NEXT: v_readlane_b32 s53, v23, 12 +; GFX8-NEXT: v_readlane_b32 s52, v23, 11 +; GFX8-NEXT: v_readlane_b32 s51, v23, 10 +; GFX8-NEXT: v_readlane_b32 s50, v23, 9 +; GFX8-NEXT: v_readlane_b32 s49, v23, 8 +; GFX8-NEXT: v_readlane_b32 s48, v23, 7 +; GFX8-NEXT: v_readlane_b32 s39, v23, 6 +; GFX8-NEXT: v_readlane_b32 s38, v23, 5 +; GFX8-NEXT: v_readlane_b32 s37, v23, 4 +; GFX8-NEXT: v_readlane_b32 s36, v23, 3 +; GFX8-NEXT: v_readlane_b32 s35, v23, 2 +; GFX8-NEXT: v_readlane_b32 s34, v23, 1 +; GFX8-NEXT: v_readlane_b32 s33, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -172,26 +172,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v23, s30, 0 -; GFX900-NEXT: v_writelane_b32 v23, s31, 1 -; GFX900-NEXT: v_writelane_b32 v23, s33, 2 -; GFX900-NEXT: v_writelane_b32 v23, s34, 3 -; GFX900-NEXT: v_writelane_b32 v23, s35, 4 -; GFX900-NEXT: v_writelane_b32 v23, s36, 5 -; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s38, 7 -; GFX900-NEXT: v_writelane_b32 v23, s39, 8 -; GFX900-NEXT: v_writelane_b32 v23, s48, 9 -; GFX900-NEXT: v_writelane_b32 v23, s49, 10 -; GFX900-NEXT: v_writelane_b32 v23, s50, 11 -; GFX900-NEXT: v_writelane_b32 v23, s51, 12 -; GFX900-NEXT: v_writelane_b32 v23, s52, 13 -; GFX900-NEXT: v_writelane_b32 v23, s53, 14 +; GFX900-NEXT: v_writelane_b32 v23, s33, 0 +; GFX900-NEXT: v_writelane_b32 v23, s34, 1 +; GFX900-NEXT: v_writelane_b32 v23, s35, 2 +; GFX900-NEXT: v_writelane_b32 v23, s36, 3 +; GFX900-NEXT: v_writelane_b32 v23, s37, 4 +; GFX900-NEXT: v_writelane_b32 v23, s38, 5 +; GFX900-NEXT: v_writelane_b32 v23, s39, 6 +; GFX900-NEXT: v_writelane_b32 v23, s48, 7 +; GFX900-NEXT: v_writelane_b32 v23, s49, 8 +; GFX900-NEXT: v_writelane_b32 v23, s50, 9 +; GFX900-NEXT: v_writelane_b32 v23, s51, 10 +; GFX900-NEXT: v_writelane_b32 v23, s52, 11 +; GFX900-NEXT: v_writelane_b32 v23, s53, 12 +; GFX900-NEXT: v_writelane_b32 v23, s54, 13 +; GFX900-NEXT: v_writelane_b32 v23, s55, 14 +; GFX900-NEXT: v_writelane_b32 v23, s30, 15 +; GFX900-NEXT: v_writelane_b32 v23, s31, 16 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -208,23 +208,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v23, 16 -; GFX900-NEXT: v_readlane_b32 s54, v23, 15 -; GFX900-NEXT: v_readlane_b32 s53, v23, 14 -; GFX900-NEXT: v_readlane_b32 s52, v23, 13 -; GFX900-NEXT: v_readlane_b32 s51, v23, 12 -; GFX900-NEXT: v_readlane_b32 s50, v23, 11 -; GFX900-NEXT: v_readlane_b32 s49, v23, 10 -; GFX900-NEXT: v_readlane_b32 s48, v23, 9 -; GFX900-NEXT: v_readlane_b32 s39, v23, 8 -; GFX900-NEXT: v_readlane_b32 s38, v23, 7 -; GFX900-NEXT: v_readlane_b32 s37, v23, 6 -; GFX900-NEXT: v_readlane_b32 s36, v23, 5 -; GFX900-NEXT: v_readlane_b32 s35, v23, 4 -; GFX900-NEXT: v_readlane_b32 s34, v23, 3 -; GFX900-NEXT: v_readlane_b32 s33, v23, 2 -; GFX900-NEXT: v_readlane_b32 s31, v23, 1 -; GFX900-NEXT: v_readlane_b32 s30, v23, 0 +; GFX900-NEXT: v_readlane_b32 s30, v23, 15 +; GFX900-NEXT: v_readlane_b32 s31, v23, 16 +; GFX900-NEXT: v_readlane_b32 s55, v23, 14 +; GFX900-NEXT: v_readlane_b32 s54, v23, 13 +; GFX900-NEXT: v_readlane_b32 s53, v23, 12 +; GFX900-NEXT: v_readlane_b32 s52, v23, 11 +; GFX900-NEXT: v_readlane_b32 s51, v23, 10 +; GFX900-NEXT: v_readlane_b32 s50, v23, 9 +; GFX900-NEXT: v_readlane_b32 s49, v23, 8 +; GFX900-NEXT: v_readlane_b32 s48, v23, 7 +; GFX900-NEXT: v_readlane_b32 s39, v23, 6 +; GFX900-NEXT: v_readlane_b32 s38, v23, 5 +; GFX900-NEXT: v_readlane_b32 s37, v23, 4 +; GFX900-NEXT: v_readlane_b32 s36, v23, 3 +; GFX900-NEXT: v_readlane_b32 s35, v23, 2 +; GFX900-NEXT: v_readlane_b32 s34, v23, 1 +; GFX900-NEXT: v_readlane_b32 s33, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -239,26 +239,27 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v23, s30, 0 -; GFX942-NEXT: v_writelane_b32 v23, s31, 1 -; GFX942-NEXT: v_writelane_b32 v23, s33, 2 -; GFX942-NEXT: v_writelane_b32 v23, s34, 3 -; GFX942-NEXT: v_writelane_b32 v23, s35, 4 -; GFX942-NEXT: v_writelane_b32 v23, s36, 5 -; GFX942-NEXT: v_writelane_b32 v23, s37, 6 -; GFX942-NEXT: v_writelane_b32 v23, s38, 7 -; GFX942-NEXT: v_writelane_b32 v23, s39, 8 -; GFX942-NEXT: v_writelane_b32 v23, s48, 9 -; GFX942-NEXT: v_writelane_b32 v23, s49, 10 -; GFX942-NEXT: v_writelane_b32 v23, s50, 11 -; GFX942-NEXT: v_writelane_b32 v23, s51, 12 -; GFX942-NEXT: v_writelane_b32 v23, s52, 13 -; GFX942-NEXT: v_writelane_b32 v23, s53, 14 +; GFX942-NEXT: v_writelane_b32 v23, s33, 0 +; GFX942-NEXT: v_writelane_b32 v23, s34, 1 +; GFX942-NEXT: v_writelane_b32 v23, s35, 2 +; GFX942-NEXT: v_writelane_b32 v23, s36, 3 +; GFX942-NEXT: v_writelane_b32 v23, s37, 4 +; GFX942-NEXT: v_writelane_b32 v23, s38, 5 +; GFX942-NEXT: v_writelane_b32 v23, s39, 6 +; GFX942-NEXT: v_writelane_b32 v23, s48, 7 +; GFX942-NEXT: v_writelane_b32 v23, s49, 8 +; GFX942-NEXT: v_writelane_b32 v23, s50, 9 +; GFX942-NEXT: v_writelane_b32 v23, s51, 10 +; GFX942-NEXT: v_writelane_b32 v23, s52, 11 +; GFX942-NEXT: v_writelane_b32 v23, s53, 12 +; GFX942-NEXT: v_writelane_b32 v23, s54, 13 +; GFX942-NEXT: v_writelane_b32 v23, s55, 14 +; GFX942-NEXT: v_writelane_b32 v23, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v23, s31, 16 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v23, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec -; GFX942-NEXT: v_writelane_b32 v23, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -273,23 +274,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v23, 16 -; GFX942-NEXT: v_readlane_b32 s54, v23, 15 -; GFX942-NEXT: v_readlane_b32 s53, v23, 14 -; GFX942-NEXT: v_readlane_b32 s52, v23, 13 -; GFX942-NEXT: v_readlane_b32 s51, v23, 12 -; GFX942-NEXT: v_readlane_b32 s50, v23, 11 -; GFX942-NEXT: v_readlane_b32 s49, v23, 10 -; GFX942-NEXT: v_readlane_b32 s48, v23, 9 -; GFX942-NEXT: v_readlane_b32 s39, v23, 8 -; GFX942-NEXT: v_readlane_b32 s38, v23, 7 -; GFX942-NEXT: v_readlane_b32 s37, v23, 6 -; GFX942-NEXT: v_readlane_b32 s36, v23, 5 -; GFX942-NEXT: v_readlane_b32 s35, v23, 4 -; GFX942-NEXT: v_readlane_b32 s34, v23, 3 -; GFX942-NEXT: v_readlane_b32 s33, v23, 2 -; GFX942-NEXT: v_readlane_b32 s31, v23, 1 -; GFX942-NEXT: v_readlane_b32 s30, v23, 0 +; GFX942-NEXT: v_readlane_b32 s30, v23, 15 +; GFX942-NEXT: v_readlane_b32 s31, v23, 16 +; GFX942-NEXT: v_readlane_b32 s55, v23, 14 +; GFX942-NEXT: v_readlane_b32 s54, v23, 13 +; GFX942-NEXT: v_readlane_b32 s53, v23, 12 +; GFX942-NEXT: v_readlane_b32 s52, v23, 11 +; GFX942-NEXT: v_readlane_b32 s51, v23, 10 +; GFX942-NEXT: v_readlane_b32 s50, v23, 9 +; GFX942-NEXT: v_readlane_b32 s49, v23, 8 +; GFX942-NEXT: v_readlane_b32 s48, v23, 7 +; GFX942-NEXT: v_readlane_b32 s39, v23, 6 +; GFX942-NEXT: v_readlane_b32 s38, v23, 5 +; GFX942-NEXT: v_readlane_b32 s37, v23, 4 +; GFX942-NEXT: v_readlane_b32 s36, v23, 3 +; GFX942-NEXT: v_readlane_b32 s35, v23, 2 +; GFX942-NEXT: v_readlane_b32 s34, v23, 1 +; GFX942-NEXT: v_readlane_b32 s33, v23, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload @@ -305,29 +306,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_1-NEXT: v_writelane_b32 v23, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v23, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v23, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v23, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v23, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v23, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v23, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v23, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v23, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v23, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v23, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v23, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v23, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v23, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v23, s31, 16 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -338,23 +339,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v23, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v23, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v23, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v23, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v23, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v23, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v23, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v23, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v23, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v23, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v23, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v23, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v23, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v23, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v23, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload @@ -370,29 +371,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_3-NEXT: v_writelane_b32 v23, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v23, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v23, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v23, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v23, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v23, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v23, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v23, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v23, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v23, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v23, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v23, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v23, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v23, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v23, s31, 16 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -403,23 +404,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v23, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v23, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v23, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v23, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v23, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v23, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v23, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v23, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v23, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v23, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v23, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v23, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v23, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v23, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v23, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload @@ -434,59 +435,59 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v23, s30, 0 +; GFX11-NEXT: v_writelane_b32 v23, s33, 0 +; GFX11-NEXT: v_writelane_b32 v23, s34, 1 +; GFX11-NEXT: v_writelane_b32 v23, s35, 2 +; GFX11-NEXT: v_writelane_b32 v23, s36, 3 +; GFX11-NEXT: v_writelane_b32 v23, s37, 4 +; GFX11-NEXT: v_writelane_b32 v23, s38, 5 +; GFX11-NEXT: v_writelane_b32 v23, s39, 6 +; GFX11-NEXT: v_writelane_b32 v23, s48, 7 +; GFX11-NEXT: v_writelane_b32 v23, s49, 8 +; GFX11-NEXT: v_writelane_b32 v23, s50, 9 +; GFX11-NEXT: v_writelane_b32 v23, s51, 10 +; GFX11-NEXT: v_writelane_b32 v23, s52, 11 +; GFX11-NEXT: v_writelane_b32 v23, s53, 12 +; GFX11-NEXT: v_writelane_b32 v23, s54, 13 +; GFX11-NEXT: v_writelane_b32 v23, s55, 14 +; GFX11-NEXT: v_writelane_b32 v23, s30, 15 +; GFX11-NEXT: v_writelane_b32 v23, s31, 16 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v23, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v23, s33, 2 -; GFX11-NEXT: v_writelane_b32 v23, s34, 3 -; GFX11-NEXT: v_writelane_b32 v23, s35, 4 -; GFX11-NEXT: v_writelane_b32 v23, s36, 5 -; GFX11-NEXT: v_writelane_b32 v23, s37, 6 -; GFX11-NEXT: v_writelane_b32 v23, s38, 7 -; GFX11-NEXT: v_writelane_b32 v23, s39, 8 -; GFX11-NEXT: v_writelane_b32 v23, s48, 9 -; GFX11-NEXT: v_writelane_b32 v23, s49, 10 -; GFX11-NEXT: v_writelane_b32 v23, s50, 11 -; GFX11-NEXT: v_writelane_b32 v23, s51, 12 -; GFX11-NEXT: v_writelane_b32 v23, s52, 13 -; GFX11-NEXT: v_writelane_b32 v23, s53, 14 -; GFX11-NEXT: v_writelane_b32 v23, s54, 15 -; GFX11-NEXT: v_writelane_b32 v23, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040 ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 ; GFX11-NEXT: s_bitset0_b32 s59, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s55, v23, 16 -; GFX11-NEXT: v_readlane_b32 s54, v23, 15 -; GFX11-NEXT: v_readlane_b32 s53, v23, 14 -; GFX11-NEXT: v_readlane_b32 s52, v23, 13 -; GFX11-NEXT: v_readlane_b32 s51, v23, 12 -; GFX11-NEXT: v_readlane_b32 s50, v23, 11 -; GFX11-NEXT: v_readlane_b32 s49, v23, 10 -; GFX11-NEXT: v_readlane_b32 s48, v23, 9 -; GFX11-NEXT: v_readlane_b32 s39, v23, 8 -; GFX11-NEXT: v_readlane_b32 s38, v23, 7 -; GFX11-NEXT: v_readlane_b32 s37, v23, 6 -; GFX11-NEXT: v_readlane_b32 s36, v23, 5 -; GFX11-NEXT: v_readlane_b32 s35, v23, 4 -; GFX11-NEXT: v_readlane_b32 s34, v23, 3 -; GFX11-NEXT: v_readlane_b32 s33, v23, 2 -; GFX11-NEXT: v_readlane_b32 s31, v23, 1 -; GFX11-NEXT: v_readlane_b32 s30, v23, 0 +; GFX11-NEXT: v_readlane_b32 s30, v23, 15 +; GFX11-NEXT: v_readlane_b32 s31, v23, 16 +; GFX11-NEXT: v_readlane_b32 s55, v23, 14 +; GFX11-NEXT: v_readlane_b32 s54, v23, 13 +; GFX11-NEXT: v_readlane_b32 s53, v23, 12 +; GFX11-NEXT: v_readlane_b32 s52, v23, 11 +; GFX11-NEXT: v_readlane_b32 s51, v23, 10 +; GFX11-NEXT: v_readlane_b32 s50, v23, 9 +; GFX11-NEXT: v_readlane_b32 s49, v23, 8 +; GFX11-NEXT: v_readlane_b32 s48, v23, 7 +; GFX11-NEXT: v_readlane_b32 s39, v23, 6 +; GFX11-NEXT: v_readlane_b32 s38, v23, 5 +; GFX11-NEXT: v_readlane_b32 s37, v23, 4 +; GFX11-NEXT: v_readlane_b32 s36, v23, 3 +; GFX11-NEXT: v_readlane_b32 s35, v23, 2 +; GFX11-NEXT: v_readlane_b32 s34, v23, 1 +; GFX11-NEXT: v_readlane_b32 s33, v23, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload @@ -505,28 +506,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v23, s30, 0 +; GFX12-NEXT: v_writelane_b32 v23, s33, 0 +; GFX12-NEXT: v_writelane_b32 v23, s34, 1 +; GFX12-NEXT: v_writelane_b32 v23, s35, 2 +; GFX12-NEXT: v_writelane_b32 v23, s36, 3 +; GFX12-NEXT: v_writelane_b32 v23, s37, 4 +; GFX12-NEXT: v_writelane_b32 v23, s38, 5 +; GFX12-NEXT: v_writelane_b32 v23, s39, 6 +; GFX12-NEXT: v_writelane_b32 v23, s48, 7 +; GFX12-NEXT: v_writelane_b32 v23, s49, 8 +; GFX12-NEXT: v_writelane_b32 v23, s50, 9 +; GFX12-NEXT: v_writelane_b32 v23, s51, 10 +; GFX12-NEXT: v_writelane_b32 v23, s52, 11 +; GFX12-NEXT: v_writelane_b32 v23, s53, 12 +; GFX12-NEXT: v_writelane_b32 v23, s54, 13 +; GFX12-NEXT: v_writelane_b32 v23, s55, 14 +; GFX12-NEXT: v_writelane_b32 v23, s30, 15 +; GFX12-NEXT: v_writelane_b32 v23, s31, 16 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v23, s31, 1 -; GFX12-NEXT: v_writelane_b32 v23, s33, 2 -; GFX12-NEXT: v_writelane_b32 v23, s34, 3 -; GFX12-NEXT: v_writelane_b32 v23, s35, 4 -; GFX12-NEXT: v_writelane_b32 v23, s36, 5 -; GFX12-NEXT: v_writelane_b32 v23, s37, 6 -; GFX12-NEXT: v_writelane_b32 v23, s38, 7 -; GFX12-NEXT: v_writelane_b32 v23, s39, 8 -; GFX12-NEXT: v_writelane_b32 v23, s48, 9 -; GFX12-NEXT: v_writelane_b32 v23, s49, 10 -; GFX12-NEXT: v_writelane_b32 v23, s50, 11 -; GFX12-NEXT: v_writelane_b32 v23, s51, 12 -; GFX12-NEXT: v_writelane_b32 v23, s52, 13 -; GFX12-NEXT: v_writelane_b32 v23, s53, 14 -; GFX12-NEXT: v_writelane_b32 v23, s54, 15 -; GFX12-NEXT: v_writelane_b32 v23, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND @@ -540,23 +541,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s55, v23, 16 -; GFX12-NEXT: v_readlane_b32 s54, v23, 15 -; GFX12-NEXT: v_readlane_b32 s53, v23, 14 -; GFX12-NEXT: v_readlane_b32 s52, v23, 13 -; GFX12-NEXT: v_readlane_b32 s51, v23, 12 -; GFX12-NEXT: v_readlane_b32 s50, v23, 11 -; GFX12-NEXT: v_readlane_b32 s49, v23, 10 -; GFX12-NEXT: v_readlane_b32 s48, v23, 9 -; GFX12-NEXT: v_readlane_b32 s39, v23, 8 -; GFX12-NEXT: v_readlane_b32 s38, v23, 7 -; GFX12-NEXT: v_readlane_b32 s37, v23, 6 -; GFX12-NEXT: v_readlane_b32 s36, v23, 5 -; GFX12-NEXT: v_readlane_b32 s35, v23, 4 -; GFX12-NEXT: v_readlane_b32 s34, v23, 3 -; GFX12-NEXT: v_readlane_b32 s33, v23, 2 -; GFX12-NEXT: v_readlane_b32 s31, v23, 1 -; GFX12-NEXT: v_readlane_b32 s30, v23, 0 +; GFX12-NEXT: v_readlane_b32 s30, v23, 15 +; GFX12-NEXT: v_readlane_b32 s31, v23, 16 +; GFX12-NEXT: v_readlane_b32 s55, v23, 14 +; GFX12-NEXT: v_readlane_b32 s54, v23, 13 +; GFX12-NEXT: v_readlane_b32 s53, v23, 12 +; GFX12-NEXT: v_readlane_b32 s52, v23, 11 +; GFX12-NEXT: v_readlane_b32 s51, v23, 10 +; GFX12-NEXT: v_readlane_b32 s50, v23, 9 +; GFX12-NEXT: v_readlane_b32 s49, v23, 8 +; GFX12-NEXT: v_readlane_b32 s48, v23, 7 +; GFX12-NEXT: v_readlane_b32 s39, v23, 6 +; GFX12-NEXT: v_readlane_b32 s38, v23, 5 +; GFX12-NEXT: v_readlane_b32 s37, v23, 4 +; GFX12-NEXT: v_readlane_b32 s36, v23, 3 +; GFX12-NEXT: v_readlane_b32 s35, v23, 2 +; GFX12-NEXT: v_readlane_b32 s34, v23, 1 +; GFX12-NEXT: v_readlane_b32 s33, v23, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -613,24 +614,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v21, s30, 0 -; GFX7-NEXT: v_writelane_b32 v21, s31, 1 -; GFX7-NEXT: v_writelane_b32 v21, s33, 2 -; GFX7-NEXT: v_writelane_b32 v21, s34, 3 -; GFX7-NEXT: v_writelane_b32 v21, s35, 4 -; GFX7-NEXT: v_writelane_b32 v21, s36, 5 -; GFX7-NEXT: v_writelane_b32 v21, s37, 6 -; GFX7-NEXT: v_writelane_b32 v21, s38, 7 -; GFX7-NEXT: v_writelane_b32 v21, s39, 8 -; GFX7-NEXT: v_writelane_b32 v21, s48, 9 -; GFX7-NEXT: v_writelane_b32 v21, s49, 10 -; GFX7-NEXT: v_writelane_b32 v21, s50, 11 -; GFX7-NEXT: v_writelane_b32 v21, s51, 12 -; GFX7-NEXT: v_writelane_b32 v21, s52, 13 -; GFX7-NEXT: v_writelane_b32 v21, s53, 14 -; GFX7-NEXT: v_writelane_b32 v21, s54, 15 +; GFX7-NEXT: v_writelane_b32 v21, s33, 0 +; GFX7-NEXT: v_writelane_b32 v21, s34, 1 +; GFX7-NEXT: v_writelane_b32 v21, s35, 2 +; GFX7-NEXT: v_writelane_b32 v21, s36, 3 +; GFX7-NEXT: v_writelane_b32 v21, s37, 4 +; GFX7-NEXT: v_writelane_b32 v21, s38, 5 +; GFX7-NEXT: v_writelane_b32 v21, s39, 6 +; GFX7-NEXT: v_writelane_b32 v21, s48, 7 +; GFX7-NEXT: v_writelane_b32 v21, s49, 8 +; GFX7-NEXT: v_writelane_b32 v21, s50, 9 +; GFX7-NEXT: v_writelane_b32 v21, s51, 10 +; GFX7-NEXT: v_writelane_b32 v21, s52, 11 +; GFX7-NEXT: v_writelane_b32 v21, s53, 12 +; GFX7-NEXT: v_writelane_b32 v21, s54, 13 +; GFX7-NEXT: v_writelane_b32 v21, s55, 14 +; GFX7-NEXT: v_writelane_b32 v21, s30, 15 +; GFX7-NEXT: v_writelane_b32 v21, s31, 16 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v21, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX7-NEXT: ;;#ASMEND @@ -640,23 +641,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v21, 16 -; GFX7-NEXT: v_readlane_b32 s54, v21, 15 -; GFX7-NEXT: v_readlane_b32 s53, v21, 14 -; GFX7-NEXT: v_readlane_b32 s52, v21, 13 -; GFX7-NEXT: v_readlane_b32 s51, v21, 12 -; GFX7-NEXT: v_readlane_b32 s50, v21, 11 -; GFX7-NEXT: v_readlane_b32 s49, v21, 10 -; GFX7-NEXT: v_readlane_b32 s48, v21, 9 -; GFX7-NEXT: v_readlane_b32 s39, v21, 8 -; GFX7-NEXT: v_readlane_b32 s38, v21, 7 -; GFX7-NEXT: v_readlane_b32 s37, v21, 6 -; GFX7-NEXT: v_readlane_b32 s36, v21, 5 -; GFX7-NEXT: v_readlane_b32 s35, v21, 4 -; GFX7-NEXT: v_readlane_b32 s34, v21, 3 -; GFX7-NEXT: v_readlane_b32 s33, v21, 2 -; GFX7-NEXT: v_readlane_b32 s31, v21, 1 -; GFX7-NEXT: v_readlane_b32 s30, v21, 0 +; GFX7-NEXT: v_readlane_b32 s30, v21, 15 +; GFX7-NEXT: v_readlane_b32 s31, v21, 16 +; GFX7-NEXT: v_readlane_b32 s55, v21, 14 +; GFX7-NEXT: v_readlane_b32 s54, v21, 13 +; GFX7-NEXT: v_readlane_b32 s53, v21, 12 +; GFX7-NEXT: v_readlane_b32 s52, v21, 11 +; GFX7-NEXT: v_readlane_b32 s51, v21, 10 +; GFX7-NEXT: v_readlane_b32 s50, v21, 9 +; GFX7-NEXT: v_readlane_b32 s49, v21, 8 +; GFX7-NEXT: v_readlane_b32 s48, v21, 7 +; GFX7-NEXT: v_readlane_b32 s39, v21, 6 +; GFX7-NEXT: v_readlane_b32 s38, v21, 5 +; GFX7-NEXT: v_readlane_b32 s37, v21, 4 +; GFX7-NEXT: v_readlane_b32 s36, v21, 3 +; GFX7-NEXT: v_readlane_b32 s35, v21, 2 +; GFX7-NEXT: v_readlane_b32 s34, v21, 1 +; GFX7-NEXT: v_readlane_b32 s33, v21, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -671,24 +672,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v21, s30, 0 -; GFX8-NEXT: v_writelane_b32 v21, s31, 1 -; GFX8-NEXT: v_writelane_b32 v21, s33, 2 -; GFX8-NEXT: v_writelane_b32 v21, s34, 3 -; GFX8-NEXT: v_writelane_b32 v21, s35, 4 -; GFX8-NEXT: v_writelane_b32 v21, s36, 5 -; GFX8-NEXT: v_writelane_b32 v21, s37, 6 -; GFX8-NEXT: v_writelane_b32 v21, s38, 7 -; GFX8-NEXT: v_writelane_b32 v21, s39, 8 -; GFX8-NEXT: v_writelane_b32 v21, s48, 9 -; GFX8-NEXT: v_writelane_b32 v21, s49, 10 -; GFX8-NEXT: v_writelane_b32 v21, s50, 11 -; GFX8-NEXT: v_writelane_b32 v21, s51, 12 -; GFX8-NEXT: v_writelane_b32 v21, s52, 13 -; GFX8-NEXT: v_writelane_b32 v21, s53, 14 -; GFX8-NEXT: v_writelane_b32 v21, s54, 15 +; GFX8-NEXT: v_writelane_b32 v21, s33, 0 +; GFX8-NEXT: v_writelane_b32 v21, s34, 1 +; GFX8-NEXT: v_writelane_b32 v21, s35, 2 +; GFX8-NEXT: v_writelane_b32 v21, s36, 3 +; GFX8-NEXT: v_writelane_b32 v21, s37, 4 +; GFX8-NEXT: v_writelane_b32 v21, s38, 5 +; GFX8-NEXT: v_writelane_b32 v21, s39, 6 +; GFX8-NEXT: v_writelane_b32 v21, s48, 7 +; GFX8-NEXT: v_writelane_b32 v21, s49, 8 +; GFX8-NEXT: v_writelane_b32 v21, s50, 9 +; GFX8-NEXT: v_writelane_b32 v21, s51, 10 +; GFX8-NEXT: v_writelane_b32 v21, s52, 11 +; GFX8-NEXT: v_writelane_b32 v21, s53, 12 +; GFX8-NEXT: v_writelane_b32 v21, s54, 13 +; GFX8-NEXT: v_writelane_b32 v21, s55, 14 +; GFX8-NEXT: v_writelane_b32 v21, s30, 15 +; GFX8-NEXT: v_writelane_b32 v21, s31, 16 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v21, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND @@ -699,23 +700,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v21, 16 -; GFX8-NEXT: v_readlane_b32 s54, v21, 15 -; GFX8-NEXT: v_readlane_b32 s53, v21, 14 -; GFX8-NEXT: v_readlane_b32 s52, v21, 13 -; GFX8-NEXT: v_readlane_b32 s51, v21, 12 -; GFX8-NEXT: v_readlane_b32 s50, v21, 11 -; GFX8-NEXT: v_readlane_b32 s49, v21, 10 -; GFX8-NEXT: v_readlane_b32 s48, v21, 9 -; GFX8-NEXT: v_readlane_b32 s39, v21, 8 -; GFX8-NEXT: v_readlane_b32 s38, v21, 7 -; GFX8-NEXT: v_readlane_b32 s37, v21, 6 -; GFX8-NEXT: v_readlane_b32 s36, v21, 5 -; GFX8-NEXT: v_readlane_b32 s35, v21, 4 -; GFX8-NEXT: v_readlane_b32 s34, v21, 3 -; GFX8-NEXT: v_readlane_b32 s33, v21, 2 -; GFX8-NEXT: v_readlane_b32 s31, v21, 1 -; GFX8-NEXT: v_readlane_b32 s30, v21, 0 +; GFX8-NEXT: v_readlane_b32 s30, v21, 15 +; GFX8-NEXT: v_readlane_b32 s31, v21, 16 +; GFX8-NEXT: v_readlane_b32 s55, v21, 14 +; GFX8-NEXT: v_readlane_b32 s54, v21, 13 +; GFX8-NEXT: v_readlane_b32 s53, v21, 12 +; GFX8-NEXT: v_readlane_b32 s52, v21, 11 +; GFX8-NEXT: v_readlane_b32 s51, v21, 10 +; GFX8-NEXT: v_readlane_b32 s50, v21, 9 +; GFX8-NEXT: v_readlane_b32 s49, v21, 8 +; GFX8-NEXT: v_readlane_b32 s48, v21, 7 +; GFX8-NEXT: v_readlane_b32 s39, v21, 6 +; GFX8-NEXT: v_readlane_b32 s38, v21, 5 +; GFX8-NEXT: v_readlane_b32 s37, v21, 4 +; GFX8-NEXT: v_readlane_b32 s36, v21, 3 +; GFX8-NEXT: v_readlane_b32 s35, v21, 2 +; GFX8-NEXT: v_readlane_b32 s34, v21, 1 +; GFX8-NEXT: v_readlane_b32 s33, v21, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -730,24 +731,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v21, s30, 0 -; GFX900-NEXT: v_writelane_b32 v21, s31, 1 -; GFX900-NEXT: v_writelane_b32 v21, s33, 2 -; GFX900-NEXT: v_writelane_b32 v21, s34, 3 -; GFX900-NEXT: v_writelane_b32 v21, s35, 4 -; GFX900-NEXT: v_writelane_b32 v21, s36, 5 -; GFX900-NEXT: v_writelane_b32 v21, s37, 6 -; GFX900-NEXT: v_writelane_b32 v21, s38, 7 -; GFX900-NEXT: v_writelane_b32 v21, s39, 8 -; GFX900-NEXT: v_writelane_b32 v21, s48, 9 -; GFX900-NEXT: v_writelane_b32 v21, s49, 10 -; GFX900-NEXT: v_writelane_b32 v21, s50, 11 -; GFX900-NEXT: v_writelane_b32 v21, s51, 12 -; GFX900-NEXT: v_writelane_b32 v21, s52, 13 -; GFX900-NEXT: v_writelane_b32 v21, s53, 14 -; GFX900-NEXT: v_writelane_b32 v21, s54, 15 +; GFX900-NEXT: v_writelane_b32 v21, s33, 0 +; GFX900-NEXT: v_writelane_b32 v21, s34, 1 +; GFX900-NEXT: v_writelane_b32 v21, s35, 2 +; GFX900-NEXT: v_writelane_b32 v21, s36, 3 +; GFX900-NEXT: v_writelane_b32 v21, s37, 4 +; GFX900-NEXT: v_writelane_b32 v21, s38, 5 +; GFX900-NEXT: v_writelane_b32 v21, s39, 6 +; GFX900-NEXT: v_writelane_b32 v21, s48, 7 +; GFX900-NEXT: v_writelane_b32 v21, s49, 8 +; GFX900-NEXT: v_writelane_b32 v21, s50, 9 +; GFX900-NEXT: v_writelane_b32 v21, s51, 10 +; GFX900-NEXT: v_writelane_b32 v21, s52, 11 +; GFX900-NEXT: v_writelane_b32 v21, s53, 12 +; GFX900-NEXT: v_writelane_b32 v21, s54, 13 +; GFX900-NEXT: v_writelane_b32 v21, s55, 14 +; GFX900-NEXT: v_writelane_b32 v21, s30, 15 +; GFX900-NEXT: v_writelane_b32 v21, s31, 16 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v21, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND @@ -758,23 +759,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v21, 16 -; GFX900-NEXT: v_readlane_b32 s54, v21, 15 -; GFX900-NEXT: v_readlane_b32 s53, v21, 14 -; GFX900-NEXT: v_readlane_b32 s52, v21, 13 -; GFX900-NEXT: v_readlane_b32 s51, v21, 12 -; GFX900-NEXT: v_readlane_b32 s50, v21, 11 -; GFX900-NEXT: v_readlane_b32 s49, v21, 10 -; GFX900-NEXT: v_readlane_b32 s48, v21, 9 -; GFX900-NEXT: v_readlane_b32 s39, v21, 8 -; GFX900-NEXT: v_readlane_b32 s38, v21, 7 -; GFX900-NEXT: v_readlane_b32 s37, v21, 6 -; GFX900-NEXT: v_readlane_b32 s36, v21, 5 -; GFX900-NEXT: v_readlane_b32 s35, v21, 4 -; GFX900-NEXT: v_readlane_b32 s34, v21, 3 -; GFX900-NEXT: v_readlane_b32 s33, v21, 2 -; GFX900-NEXT: v_readlane_b32 s31, v21, 1 -; GFX900-NEXT: v_readlane_b32 s30, v21, 0 +; GFX900-NEXT: v_readlane_b32 s30, v21, 15 +; GFX900-NEXT: v_readlane_b32 s31, v21, 16 +; GFX900-NEXT: v_readlane_b32 s55, v21, 14 +; GFX900-NEXT: v_readlane_b32 s54, v21, 13 +; GFX900-NEXT: v_readlane_b32 s53, v21, 12 +; GFX900-NEXT: v_readlane_b32 s52, v21, 11 +; GFX900-NEXT: v_readlane_b32 s51, v21, 10 +; GFX900-NEXT: v_readlane_b32 s50, v21, 9 +; GFX900-NEXT: v_readlane_b32 s49, v21, 8 +; GFX900-NEXT: v_readlane_b32 s48, v21, 7 +; GFX900-NEXT: v_readlane_b32 s39, v21, 6 +; GFX900-NEXT: v_readlane_b32 s38, v21, 5 +; GFX900-NEXT: v_readlane_b32 s37, v21, 4 +; GFX900-NEXT: v_readlane_b32 s36, v21, 3 +; GFX900-NEXT: v_readlane_b32 s35, v21, 2 +; GFX900-NEXT: v_readlane_b32 s34, v21, 1 +; GFX900-NEXT: v_readlane_b32 s33, v21, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -789,24 +790,25 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v21, s30, 0 -; GFX942-NEXT: v_writelane_b32 v21, s31, 1 -; GFX942-NEXT: v_writelane_b32 v21, s33, 2 -; GFX942-NEXT: v_writelane_b32 v21, s34, 3 -; GFX942-NEXT: v_writelane_b32 v21, s35, 4 -; GFX942-NEXT: v_writelane_b32 v21, s36, 5 -; GFX942-NEXT: v_writelane_b32 v21, s37, 6 -; GFX942-NEXT: v_writelane_b32 v21, s38, 7 -; GFX942-NEXT: v_writelane_b32 v21, s39, 8 -; GFX942-NEXT: v_writelane_b32 v21, s48, 9 -; GFX942-NEXT: v_writelane_b32 v21, s49, 10 -; GFX942-NEXT: v_writelane_b32 v21, s50, 11 -; GFX942-NEXT: v_writelane_b32 v21, s51, 12 -; GFX942-NEXT: v_writelane_b32 v21, s52, 13 -; GFX942-NEXT: v_writelane_b32 v21, s53, 14 -; GFX942-NEXT: v_writelane_b32 v21, s54, 15 +; GFX942-NEXT: v_writelane_b32 v21, s33, 0 +; GFX942-NEXT: v_writelane_b32 v21, s34, 1 +; GFX942-NEXT: v_writelane_b32 v21, s35, 2 +; GFX942-NEXT: v_writelane_b32 v21, s36, 3 +; GFX942-NEXT: v_writelane_b32 v21, s37, 4 +; GFX942-NEXT: v_writelane_b32 v21, s38, 5 +; GFX942-NEXT: v_writelane_b32 v21, s39, 6 +; GFX942-NEXT: v_writelane_b32 v21, s48, 7 +; GFX942-NEXT: v_writelane_b32 v21, s49, 8 +; GFX942-NEXT: v_writelane_b32 v21, s50, 9 +; GFX942-NEXT: v_writelane_b32 v21, s51, 10 +; GFX942-NEXT: v_writelane_b32 v21, s52, 11 +; GFX942-NEXT: v_writelane_b32 v21, s53, 12 +; GFX942-NEXT: v_writelane_b32 v21, s54, 13 +; GFX942-NEXT: v_writelane_b32 v21, s55, 14 +; GFX942-NEXT: v_writelane_b32 v21, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v21, s31, 16 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec -; GFX942-NEXT: v_writelane_b32 v21, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND @@ -818,23 +820,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v21, 16 -; GFX942-NEXT: v_readlane_b32 s54, v21, 15 -; GFX942-NEXT: v_readlane_b32 s53, v21, 14 -; GFX942-NEXT: v_readlane_b32 s52, v21, 13 -; GFX942-NEXT: v_readlane_b32 s51, v21, 12 -; GFX942-NEXT: v_readlane_b32 s50, v21, 11 -; GFX942-NEXT: v_readlane_b32 s49, v21, 10 -; GFX942-NEXT: v_readlane_b32 s48, v21, 9 -; GFX942-NEXT: v_readlane_b32 s39, v21, 8 -; GFX942-NEXT: v_readlane_b32 s38, v21, 7 -; GFX942-NEXT: v_readlane_b32 s37, v21, 6 -; GFX942-NEXT: v_readlane_b32 s36, v21, 5 -; GFX942-NEXT: v_readlane_b32 s35, v21, 4 -; GFX942-NEXT: v_readlane_b32 s34, v21, 3 -; GFX942-NEXT: v_readlane_b32 s33, v21, 2 -; GFX942-NEXT: v_readlane_b32 s31, v21, 1 -; GFX942-NEXT: v_readlane_b32 s30, v21, 0 +; GFX942-NEXT: v_readlane_b32 s30, v21, 15 +; GFX942-NEXT: v_readlane_b32 s31, v21, 16 +; GFX942-NEXT: v_readlane_b32 s55, v21, 14 +; GFX942-NEXT: v_readlane_b32 s54, v21, 13 +; GFX942-NEXT: v_readlane_b32 s53, v21, 12 +; GFX942-NEXT: v_readlane_b32 s52, v21, 11 +; GFX942-NEXT: v_readlane_b32 s51, v21, 10 +; GFX942-NEXT: v_readlane_b32 s50, v21, 9 +; GFX942-NEXT: v_readlane_b32 s49, v21, 8 +; GFX942-NEXT: v_readlane_b32 s48, v21, 7 +; GFX942-NEXT: v_readlane_b32 s39, v21, 6 +; GFX942-NEXT: v_readlane_b32 s38, v21, 5 +; GFX942-NEXT: v_readlane_b32 s37, v21, 4 +; GFX942-NEXT: v_readlane_b32 s36, v21, 3 +; GFX942-NEXT: v_readlane_b32 s35, v21, 2 +; GFX942-NEXT: v_readlane_b32 s34, v21, 1 +; GFX942-NEXT: v_readlane_b32 s33, v21, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload @@ -850,51 +852,51 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 -; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo -; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 -; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16 +; GFX10_1-NEXT: v_writelane_b32 v21, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v21, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v21, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v21, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v21, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v21, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v21, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v21, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v21, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v21, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v21, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v21, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v21, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v21, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v21, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v21, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v21, s31, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v21, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v21, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v21, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v21, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v21, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v21, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v21, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v21, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v21, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v21, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v21, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v21, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v21, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v21, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v21, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v21, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v21, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload @@ -910,51 +912,51 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 -; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo -; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 -; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16 +; GFX10_3-NEXT: v_writelane_b32 v21, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v21, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v21, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v21, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v21, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v21, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v21, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v21, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v21, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v21, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v21, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v21, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v21, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v21, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v21, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v21, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v21, s31, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v21, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v21, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v21, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v21, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v21, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v21, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v21, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v21, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v21, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v21, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v21, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v21, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v21, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v21, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v21, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v21, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v21, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload @@ -969,24 +971,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v21, s30, 0 +; GFX11-NEXT: v_writelane_b32 v21, s33, 0 +; GFX11-NEXT: v_writelane_b32 v21, s34, 1 +; GFX11-NEXT: v_writelane_b32 v21, s35, 2 +; GFX11-NEXT: v_writelane_b32 v21, s36, 3 +; GFX11-NEXT: v_writelane_b32 v21, s37, 4 +; GFX11-NEXT: v_writelane_b32 v21, s38, 5 +; GFX11-NEXT: v_writelane_b32 v21, s39, 6 +; GFX11-NEXT: v_writelane_b32 v21, s48, 7 +; GFX11-NEXT: v_writelane_b32 v21, s49, 8 +; GFX11-NEXT: v_writelane_b32 v21, s50, 9 +; GFX11-NEXT: v_writelane_b32 v21, s51, 10 +; GFX11-NEXT: v_writelane_b32 v21, s52, 11 +; GFX11-NEXT: v_writelane_b32 v21, s53, 12 +; GFX11-NEXT: v_writelane_b32 v21, s54, 13 +; GFX11-NEXT: v_writelane_b32 v21, s55, 14 +; GFX11-NEXT: v_writelane_b32 v21, s30, 15 +; GFX11-NEXT: v_writelane_b32 v21, s31, 16 ; GFX11-NEXT: s_and_b32 s59, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v21, s31, 1 -; GFX11-NEXT: v_writelane_b32 v21, s33, 2 -; GFX11-NEXT: v_writelane_b32 v21, s34, 3 -; GFX11-NEXT: v_writelane_b32 v21, s35, 4 -; GFX11-NEXT: v_writelane_b32 v21, s36, 5 -; GFX11-NEXT: v_writelane_b32 v21, s37, 6 -; GFX11-NEXT: v_writelane_b32 v21, s38, 7 -; GFX11-NEXT: v_writelane_b32 v21, s39, 8 -; GFX11-NEXT: v_writelane_b32 v21, s48, 9 -; GFX11-NEXT: v_writelane_b32 v21, s49, 10 -; GFX11-NEXT: v_writelane_b32 v21, s50, 11 -; GFX11-NEXT: v_writelane_b32 v21, s51, 12 -; GFX11-NEXT: v_writelane_b32 v21, s52, 13 -; GFX11-NEXT: v_writelane_b32 v21, s53, 14 -; GFX11-NEXT: v_writelane_b32 v21, s54, 15 -; GFX11-NEXT: v_writelane_b32 v21, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND @@ -999,23 +1001,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s55, v21, 16 -; GFX11-NEXT: v_readlane_b32 s54, v21, 15 -; GFX11-NEXT: v_readlane_b32 s53, v21, 14 -; GFX11-NEXT: v_readlane_b32 s52, v21, 13 -; GFX11-NEXT: v_readlane_b32 s51, v21, 12 -; GFX11-NEXT: v_readlane_b32 s50, v21, 11 -; GFX11-NEXT: v_readlane_b32 s49, v21, 10 -; GFX11-NEXT: v_readlane_b32 s48, v21, 9 -; GFX11-NEXT: v_readlane_b32 s39, v21, 8 -; GFX11-NEXT: v_readlane_b32 s38, v21, 7 -; GFX11-NEXT: v_readlane_b32 s37, v21, 6 -; GFX11-NEXT: v_readlane_b32 s36, v21, 5 -; GFX11-NEXT: v_readlane_b32 s35, v21, 4 -; GFX11-NEXT: v_readlane_b32 s34, v21, 3 -; GFX11-NEXT: v_readlane_b32 s33, v21, 2 -; GFX11-NEXT: v_readlane_b32 s31, v21, 1 -; GFX11-NEXT: v_readlane_b32 s30, v21, 0 +; GFX11-NEXT: v_readlane_b32 s30, v21, 15 +; GFX11-NEXT: v_readlane_b32 s31, v21, 16 +; GFX11-NEXT: v_readlane_b32 s55, v21, 14 +; GFX11-NEXT: v_readlane_b32 s54, v21, 13 +; GFX11-NEXT: v_readlane_b32 s53, v21, 12 +; GFX11-NEXT: v_readlane_b32 s52, v21, 11 +; GFX11-NEXT: v_readlane_b32 s51, v21, 10 +; GFX11-NEXT: v_readlane_b32 s50, v21, 9 +; GFX11-NEXT: v_readlane_b32 s49, v21, 8 +; GFX11-NEXT: v_readlane_b32 s48, v21, 7 +; GFX11-NEXT: v_readlane_b32 s39, v21, 6 +; GFX11-NEXT: v_readlane_b32 s38, v21, 5 +; GFX11-NEXT: v_readlane_b32 s37, v21, 4 +; GFX11-NEXT: v_readlane_b32 s36, v21, 3 +; GFX11-NEXT: v_readlane_b32 s35, v21, 2 +; GFX11-NEXT: v_readlane_b32 s34, v21, 1 +; GFX11-NEXT: v_readlane_b32 s33, v21, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload @@ -1034,50 +1036,49 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v21, s30, 0 -; GFX12-NEXT: s_and_b32 s59, 0, exec_lo -; GFX12-NEXT: v_writelane_b32 v21, s31, 1 -; GFX12-NEXT: v_writelane_b32 v21, s33, 2 -; GFX12-NEXT: v_writelane_b32 v21, s34, 3 -; GFX12-NEXT: v_writelane_b32 v21, s35, 4 -; GFX12-NEXT: v_writelane_b32 v21, s36, 5 -; GFX12-NEXT: v_writelane_b32 v21, s37, 6 -; GFX12-NEXT: v_writelane_b32 v21, s38, 7 -; GFX12-NEXT: v_writelane_b32 v21, s39, 8 -; GFX12-NEXT: v_writelane_b32 v21, s48, 9 -; GFX12-NEXT: v_writelane_b32 v21, s49, 10 -; GFX12-NEXT: v_writelane_b32 v21, s50, 11 -; GFX12-NEXT: v_writelane_b32 v21, s51, 12 -; GFX12-NEXT: v_writelane_b32 v21, s52, 13 -; GFX12-NEXT: v_writelane_b32 v21, s53, 14 -; GFX12-NEXT: v_writelane_b32 v21, s54, 15 -; GFX12-NEXT: v_writelane_b32 v21, s55, 16 +; GFX12-NEXT: v_writelane_b32 v21, s33, 0 +; GFX12-NEXT: v_writelane_b32 v21, s34, 1 +; GFX12-NEXT: v_writelane_b32 v21, s35, 2 +; GFX12-NEXT: v_writelane_b32 v21, s36, 3 +; GFX12-NEXT: v_writelane_b32 v21, s37, 4 +; GFX12-NEXT: v_writelane_b32 v21, s38, 5 +; GFX12-NEXT: v_writelane_b32 v21, s39, 6 +; GFX12-NEXT: v_writelane_b32 v21, s48, 7 +; GFX12-NEXT: v_writelane_b32 v21, s49, 8 +; GFX12-NEXT: v_writelane_b32 v21, s50, 9 +; GFX12-NEXT: v_writelane_b32 v21, s51, 10 +; GFX12-NEXT: v_writelane_b32 v21, s52, 11 +; GFX12-NEXT: v_writelane_b32 v21, s53, 12 +; GFX12-NEXT: v_writelane_b32 v21, s54, 13 +; GFX12-NEXT: v_writelane_b32 v21, s55, 14 +; GFX12-NEXT: v_writelane_b32 v21, s30, 15 +; GFX12-NEXT: v_writelane_b32 v21, s31, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX12-NEXT: s_and_b32 s59, 0, exec_lo ; GFX12-NEXT: s_mov_b32 s54, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_readlane_b32 s55, v21, 16 -; GFX12-NEXT: v_readlane_b32 s54, v21, 15 -; GFX12-NEXT: v_readlane_b32 s53, v21, 14 -; GFX12-NEXT: v_readlane_b32 s52, v21, 13 -; GFX12-NEXT: v_readlane_b32 s51, v21, 12 -; GFX12-NEXT: v_readlane_b32 s50, v21, 11 -; GFX12-NEXT: v_readlane_b32 s49, v21, 10 -; GFX12-NEXT: v_readlane_b32 s48, v21, 9 -; GFX12-NEXT: v_readlane_b32 s39, v21, 8 -; GFX12-NEXT: v_readlane_b32 s38, v21, 7 -; GFX12-NEXT: v_readlane_b32 s37, v21, 6 -; GFX12-NEXT: v_readlane_b32 s36, v21, 5 -; GFX12-NEXT: v_readlane_b32 s35, v21, 4 -; GFX12-NEXT: v_readlane_b32 s34, v21, 3 -; GFX12-NEXT: v_readlane_b32 s33, v21, 2 -; GFX12-NEXT: v_readlane_b32 s31, v21, 1 -; GFX12-NEXT: v_readlane_b32 s30, v21, 0 +; GFX12-NEXT: v_readlane_b32 s30, v21, 15 +; GFX12-NEXT: v_readlane_b32 s31, v21, 16 +; GFX12-NEXT: v_readlane_b32 s55, v21, 14 +; GFX12-NEXT: v_readlane_b32 s54, v21, 13 +; GFX12-NEXT: v_readlane_b32 s53, v21, 12 +; GFX12-NEXT: v_readlane_b32 s52, v21, 11 +; GFX12-NEXT: v_readlane_b32 s51, v21, 10 +; GFX12-NEXT: v_readlane_b32 s50, v21, 9 +; GFX12-NEXT: v_readlane_b32 s49, v21, 8 +; GFX12-NEXT: v_readlane_b32 s48, v21, 7 +; GFX12-NEXT: v_readlane_b32 s39, v21, 6 +; GFX12-NEXT: v_readlane_b32 s38, v21, 5 +; GFX12-NEXT: v_readlane_b32 s37, v21, 4 +; GFX12-NEXT: v_readlane_b32 s36, v21, 3 +; GFX12-NEXT: v_readlane_b32 s35, v21, 2 +; GFX12-NEXT: v_readlane_b32 s34, v21, 1 +; GFX12-NEXT: v_readlane_b32 s33, v21, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -1135,30 +1136,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v23, s28, 17 ; GFX7-NEXT: v_writelane_b32 v23, s29, 18 -; GFX7-NEXT: v_writelane_b32 v23, s30, 0 -; GFX7-NEXT: v_writelane_b32 v23, s31, 1 -; GFX7-NEXT: v_writelane_b32 v23, s33, 2 -; GFX7-NEXT: v_writelane_b32 v23, s34, 3 -; GFX7-NEXT: v_writelane_b32 v23, s35, 4 -; GFX7-NEXT: v_writelane_b32 v23, s36, 5 -; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s48, 9 -; GFX7-NEXT: v_writelane_b32 v23, s49, 10 -; GFX7-NEXT: v_writelane_b32 v23, s50, 11 -; GFX7-NEXT: v_writelane_b32 v23, s51, 12 -; GFX7-NEXT: v_writelane_b32 v23, s52, 13 +; GFX7-NEXT: v_writelane_b32 v23, s33, 0 +; GFX7-NEXT: v_writelane_b32 v23, s34, 1 +; GFX7-NEXT: v_writelane_b32 v23, s35, 2 +; GFX7-NEXT: v_writelane_b32 v23, s36, 3 +; GFX7-NEXT: v_writelane_b32 v23, s37, 4 +; GFX7-NEXT: v_writelane_b32 v23, s38, 5 +; GFX7-NEXT: v_writelane_b32 v23, s39, 6 +; GFX7-NEXT: v_writelane_b32 v23, s48, 7 +; GFX7-NEXT: v_writelane_b32 v23, s49, 8 +; GFX7-NEXT: v_writelane_b32 v23, s50, 9 +; GFX7-NEXT: v_writelane_b32 v23, s51, 10 +; GFX7-NEXT: v_writelane_b32 v23, s52, 11 +; GFX7-NEXT: v_writelane_b32 v23, s53, 12 +; GFX7-NEXT: v_writelane_b32 v23, s54, 13 +; GFX7-NEXT: v_writelane_b32 v23, s55, 14 +; GFX7-NEXT: v_writelane_b32 v23, s30, 15 +; GFX7-NEXT: v_writelane_b32 v23, s31, 16 ; GFX7-NEXT: s_lshr_b32 s5, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -1169,23 +1170,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v23, 16 -; GFX7-NEXT: v_readlane_b32 s54, v23, 15 -; GFX7-NEXT: v_readlane_b32 s53, v23, 14 -; GFX7-NEXT: v_readlane_b32 s52, v23, 13 -; GFX7-NEXT: v_readlane_b32 s51, v23, 12 -; GFX7-NEXT: v_readlane_b32 s50, v23, 11 -; GFX7-NEXT: v_readlane_b32 s49, v23, 10 -; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 -; GFX7-NEXT: v_readlane_b32 s37, v23, 6 -; GFX7-NEXT: v_readlane_b32 s36, v23, 5 -; GFX7-NEXT: v_readlane_b32 s35, v23, 4 -; GFX7-NEXT: v_readlane_b32 s34, v23, 3 -; GFX7-NEXT: v_readlane_b32 s33, v23, 2 -; GFX7-NEXT: v_readlane_b32 s31, v23, 1 -; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: v_readlane_b32 s30, v23, 15 +; GFX7-NEXT: v_readlane_b32 s31, v23, 16 +; GFX7-NEXT: v_readlane_b32 s55, v23, 14 +; GFX7-NEXT: v_readlane_b32 s54, v23, 13 +; GFX7-NEXT: v_readlane_b32 s53, v23, 12 +; GFX7-NEXT: v_readlane_b32 s52, v23, 11 +; GFX7-NEXT: v_readlane_b32 s51, v23, 10 +; GFX7-NEXT: v_readlane_b32 s50, v23, 9 +; GFX7-NEXT: v_readlane_b32 s49, v23, 8 +; GFX7-NEXT: v_readlane_b32 s48, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 6 +; GFX7-NEXT: v_readlane_b32 s38, v23, 5 +; GFX7-NEXT: v_readlane_b32 s37, v23, 4 +; GFX7-NEXT: v_readlane_b32 s36, v23, 3 +; GFX7-NEXT: v_readlane_b32 s35, v23, 2 +; GFX7-NEXT: v_readlane_b32 s34, v23, 1 +; GFX7-NEXT: v_readlane_b32 s33, v23, 0 ; GFX7-NEXT: v_readlane_b32 s28, v23, 17 ; GFX7-NEXT: v_readlane_b32 s29, v23, 18 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -1206,30 +1207,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v23, s30, 0 -; GFX8-NEXT: v_writelane_b32 v23, s31, 1 -; GFX8-NEXT: v_writelane_b32 v23, s33, 2 -; GFX8-NEXT: v_writelane_b32 v23, s34, 3 -; GFX8-NEXT: v_writelane_b32 v23, s35, 4 -; GFX8-NEXT: v_writelane_b32 v23, s36, 5 -; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s38, 7 -; GFX8-NEXT: v_writelane_b32 v23, s39, 8 -; GFX8-NEXT: v_writelane_b32 v23, s48, 9 -; GFX8-NEXT: v_writelane_b32 v23, s49, 10 -; GFX8-NEXT: v_writelane_b32 v23, s50, 11 -; GFX8-NEXT: v_writelane_b32 v23, s51, 12 -; GFX8-NEXT: v_writelane_b32 v23, s52, 13 +; GFX8-NEXT: v_writelane_b32 v23, s33, 0 +; GFX8-NEXT: v_writelane_b32 v23, s34, 1 +; GFX8-NEXT: v_writelane_b32 v23, s35, 2 +; GFX8-NEXT: v_writelane_b32 v23, s36, 3 +; GFX8-NEXT: v_writelane_b32 v23, s37, 4 +; GFX8-NEXT: v_writelane_b32 v23, s38, 5 +; GFX8-NEXT: v_writelane_b32 v23, s39, 6 +; GFX8-NEXT: v_writelane_b32 v23, s48, 7 +; GFX8-NEXT: v_writelane_b32 v23, s49, 8 +; GFX8-NEXT: v_writelane_b32 v23, s50, 9 +; GFX8-NEXT: v_writelane_b32 v23, s51, 10 +; GFX8-NEXT: v_writelane_b32 v23, s52, 11 +; GFX8-NEXT: v_writelane_b32 v23, s53, 12 +; GFX8-NEXT: v_writelane_b32 v23, s54, 13 +; GFX8-NEXT: v_writelane_b32 v23, s55, 14 +; GFX8-NEXT: v_writelane_b32 v23, s30, 15 +; GFX8-NEXT: v_writelane_b32 v23, s31, 16 ; GFX8-NEXT: s_lshr_b32 s5, s32, 6 -; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: v_writelane_b32 v22, s4, 0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -1241,23 +1242,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v23, 16 -; GFX8-NEXT: v_readlane_b32 s54, v23, 15 -; GFX8-NEXT: v_readlane_b32 s53, v23, 14 -; GFX8-NEXT: v_readlane_b32 s52, v23, 13 -; GFX8-NEXT: v_readlane_b32 s51, v23, 12 -; GFX8-NEXT: v_readlane_b32 s50, v23, 11 -; GFX8-NEXT: v_readlane_b32 s49, v23, 10 -; GFX8-NEXT: v_readlane_b32 s48, v23, 9 -; GFX8-NEXT: v_readlane_b32 s39, v23, 8 -; GFX8-NEXT: v_readlane_b32 s38, v23, 7 -; GFX8-NEXT: v_readlane_b32 s37, v23, 6 -; GFX8-NEXT: v_readlane_b32 s36, v23, 5 -; GFX8-NEXT: v_readlane_b32 s35, v23, 4 -; GFX8-NEXT: v_readlane_b32 s34, v23, 3 -; GFX8-NEXT: v_readlane_b32 s33, v23, 2 -; GFX8-NEXT: v_readlane_b32 s31, v23, 1 -; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: v_readlane_b32 s30, v23, 15 +; GFX8-NEXT: v_readlane_b32 s31, v23, 16 +; GFX8-NEXT: v_readlane_b32 s55, v23, 14 +; GFX8-NEXT: v_readlane_b32 s54, v23, 13 +; GFX8-NEXT: v_readlane_b32 s53, v23, 12 +; GFX8-NEXT: v_readlane_b32 s52, v23, 11 +; GFX8-NEXT: v_readlane_b32 s51, v23, 10 +; GFX8-NEXT: v_readlane_b32 s50, v23, 9 +; GFX8-NEXT: v_readlane_b32 s49, v23, 8 +; GFX8-NEXT: v_readlane_b32 s48, v23, 7 +; GFX8-NEXT: v_readlane_b32 s39, v23, 6 +; GFX8-NEXT: v_readlane_b32 s38, v23, 5 +; GFX8-NEXT: v_readlane_b32 s37, v23, 4 +; GFX8-NEXT: v_readlane_b32 s36, v23, 3 +; GFX8-NEXT: v_readlane_b32 s35, v23, 2 +; GFX8-NEXT: v_readlane_b32 s34, v23, 1 +; GFX8-NEXT: v_readlane_b32 s33, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1276,30 +1277,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v23, s30, 0 -; GFX900-NEXT: v_writelane_b32 v23, s31, 1 -; GFX900-NEXT: v_writelane_b32 v23, s33, 2 -; GFX900-NEXT: v_writelane_b32 v23, s34, 3 -; GFX900-NEXT: v_writelane_b32 v23, s35, 4 -; GFX900-NEXT: v_writelane_b32 v23, s36, 5 -; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s38, 7 -; GFX900-NEXT: v_writelane_b32 v23, s39, 8 -; GFX900-NEXT: v_writelane_b32 v23, s48, 9 -; GFX900-NEXT: v_writelane_b32 v23, s49, 10 -; GFX900-NEXT: v_writelane_b32 v23, s50, 11 -; GFX900-NEXT: v_writelane_b32 v23, s51, 12 -; GFX900-NEXT: v_writelane_b32 v23, s52, 13 +; GFX900-NEXT: v_writelane_b32 v23, s33, 0 +; GFX900-NEXT: v_writelane_b32 v23, s34, 1 +; GFX900-NEXT: v_writelane_b32 v23, s35, 2 +; GFX900-NEXT: v_writelane_b32 v23, s36, 3 +; GFX900-NEXT: v_writelane_b32 v23, s37, 4 +; GFX900-NEXT: v_writelane_b32 v23, s38, 5 +; GFX900-NEXT: v_writelane_b32 v23, s39, 6 +; GFX900-NEXT: v_writelane_b32 v23, s48, 7 +; GFX900-NEXT: v_writelane_b32 v23, s49, 8 +; GFX900-NEXT: v_writelane_b32 v23, s50, 9 +; GFX900-NEXT: v_writelane_b32 v23, s51, 10 +; GFX900-NEXT: v_writelane_b32 v23, s52, 11 +; GFX900-NEXT: v_writelane_b32 v23, s53, 12 +; GFX900-NEXT: v_writelane_b32 v23, s54, 13 +; GFX900-NEXT: v_writelane_b32 v23, s55, 14 +; GFX900-NEXT: v_writelane_b32 v23, s30, 15 +; GFX900-NEXT: v_writelane_b32 v23, s31, 16 ; GFX900-NEXT: s_lshr_b32 s5, s32, 6 -; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: v_writelane_b32 v22, s4, 0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -1311,23 +1312,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v23, 16 -; GFX900-NEXT: v_readlane_b32 s54, v23, 15 -; GFX900-NEXT: v_readlane_b32 s53, v23, 14 -; GFX900-NEXT: v_readlane_b32 s52, v23, 13 -; GFX900-NEXT: v_readlane_b32 s51, v23, 12 -; GFX900-NEXT: v_readlane_b32 s50, v23, 11 -; GFX900-NEXT: v_readlane_b32 s49, v23, 10 -; GFX900-NEXT: v_readlane_b32 s48, v23, 9 -; GFX900-NEXT: v_readlane_b32 s39, v23, 8 -; GFX900-NEXT: v_readlane_b32 s38, v23, 7 -; GFX900-NEXT: v_readlane_b32 s37, v23, 6 -; GFX900-NEXT: v_readlane_b32 s36, v23, 5 -; GFX900-NEXT: v_readlane_b32 s35, v23, 4 -; GFX900-NEXT: v_readlane_b32 s34, v23, 3 -; GFX900-NEXT: v_readlane_b32 s33, v23, 2 -; GFX900-NEXT: v_readlane_b32 s31, v23, 1 -; GFX900-NEXT: v_readlane_b32 s30, v23, 0 +; GFX900-NEXT: v_readlane_b32 s30, v23, 15 +; GFX900-NEXT: v_readlane_b32 s31, v23, 16 +; GFX900-NEXT: v_readlane_b32 s55, v23, 14 +; GFX900-NEXT: v_readlane_b32 s54, v23, 13 +; GFX900-NEXT: v_readlane_b32 s53, v23, 12 +; GFX900-NEXT: v_readlane_b32 s52, v23, 11 +; GFX900-NEXT: v_readlane_b32 s51, v23, 10 +; GFX900-NEXT: v_readlane_b32 s50, v23, 9 +; GFX900-NEXT: v_readlane_b32 s49, v23, 8 +; GFX900-NEXT: v_readlane_b32 s48, v23, 7 +; GFX900-NEXT: v_readlane_b32 s39, v23, 6 +; GFX900-NEXT: v_readlane_b32 s38, v23, 5 +; GFX900-NEXT: v_readlane_b32 s37, v23, 4 +; GFX900-NEXT: v_readlane_b32 s36, v23, 3 +; GFX900-NEXT: v_readlane_b32 s35, v23, 2 +; GFX900-NEXT: v_readlane_b32 s34, v23, 1 +; GFX900-NEXT: v_readlane_b32 s33, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1344,25 +1345,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v22, s30, 0 -; GFX942-NEXT: v_writelane_b32 v22, s31, 1 -; GFX942-NEXT: v_writelane_b32 v22, s33, 2 -; GFX942-NEXT: v_writelane_b32 v22, s34, 3 -; GFX942-NEXT: v_writelane_b32 v22, s35, 4 -; GFX942-NEXT: v_writelane_b32 v22, s36, 5 -; GFX942-NEXT: v_writelane_b32 v22, s37, 6 -; GFX942-NEXT: v_writelane_b32 v22, s38, 7 -; GFX942-NEXT: v_writelane_b32 v22, s39, 8 -; GFX942-NEXT: v_writelane_b32 v22, s48, 9 -; GFX942-NEXT: v_writelane_b32 v22, s49, 10 -; GFX942-NEXT: v_writelane_b32 v22, s50, 11 -; GFX942-NEXT: v_writelane_b32 v22, s51, 12 -; GFX942-NEXT: v_writelane_b32 v22, s52, 13 -; GFX942-NEXT: v_writelane_b32 v22, s53, 14 +; GFX942-NEXT: v_writelane_b32 v22, s33, 0 +; GFX942-NEXT: v_writelane_b32 v22, s34, 1 +; GFX942-NEXT: v_writelane_b32 v22, s35, 2 +; GFX942-NEXT: v_writelane_b32 v22, s36, 3 +; GFX942-NEXT: v_writelane_b32 v22, s37, 4 +; GFX942-NEXT: v_writelane_b32 v22, s38, 5 +; GFX942-NEXT: v_writelane_b32 v22, s39, 6 +; GFX942-NEXT: v_writelane_b32 v22, s48, 7 +; GFX942-NEXT: v_writelane_b32 v22, s49, 8 +; GFX942-NEXT: v_writelane_b32 v22, s50, 9 +; GFX942-NEXT: v_writelane_b32 v22, s51, 10 +; GFX942-NEXT: v_writelane_b32 v22, s52, 11 +; GFX942-NEXT: v_writelane_b32 v22, s53, 12 +; GFX942-NEXT: v_writelane_b32 v22, s54, 13 +; GFX942-NEXT: v_writelane_b32 v22, s55, 14 +; GFX942-NEXT: v_writelane_b32 v22, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v22, s31, 16 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v22, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 -; GFX942-NEXT: v_writelane_b32 v22, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -1376,23 +1378,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v22, 16 -; GFX942-NEXT: v_readlane_b32 s54, v22, 15 -; GFX942-NEXT: v_readlane_b32 s53, v22, 14 -; GFX942-NEXT: v_readlane_b32 s52, v22, 13 -; GFX942-NEXT: v_readlane_b32 s51, v22, 12 -; GFX942-NEXT: v_readlane_b32 s50, v22, 11 -; GFX942-NEXT: v_readlane_b32 s49, v22, 10 -; GFX942-NEXT: v_readlane_b32 s48, v22, 9 -; GFX942-NEXT: v_readlane_b32 s39, v22, 8 -; GFX942-NEXT: v_readlane_b32 s38, v22, 7 -; GFX942-NEXT: v_readlane_b32 s37, v22, 6 -; GFX942-NEXT: v_readlane_b32 s36, v22, 5 -; GFX942-NEXT: v_readlane_b32 s35, v22, 4 -; GFX942-NEXT: v_readlane_b32 s34, v22, 3 -; GFX942-NEXT: v_readlane_b32 s33, v22, 2 -; GFX942-NEXT: v_readlane_b32 s31, v22, 1 -; GFX942-NEXT: v_readlane_b32 s30, v22, 0 +; GFX942-NEXT: v_readlane_b32 s30, v22, 15 +; GFX942-NEXT: v_readlane_b32 s31, v22, 16 +; GFX942-NEXT: v_readlane_b32 s55, v22, 14 +; GFX942-NEXT: v_readlane_b32 s54, v22, 13 +; GFX942-NEXT: v_readlane_b32 s53, v22, 12 +; GFX942-NEXT: v_readlane_b32 s52, v22, 11 +; GFX942-NEXT: v_readlane_b32 s51, v22, 10 +; GFX942-NEXT: v_readlane_b32 s50, v22, 9 +; GFX942-NEXT: v_readlane_b32 s49, v22, 8 +; GFX942-NEXT: v_readlane_b32 s48, v22, 7 +; GFX942-NEXT: v_readlane_b32 s39, v22, 6 +; GFX942-NEXT: v_readlane_b32 s38, v22, 5 +; GFX942-NEXT: v_readlane_b32 s37, v22, 4 +; GFX942-NEXT: v_readlane_b32 s36, v22, 3 +; GFX942-NEXT: v_readlane_b32 s35, v22, 2 +; GFX942-NEXT: v_readlane_b32 s34, v22, 1 +; GFX942-NEXT: v_readlane_b32 s33, v22, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload @@ -1408,31 +1410,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 +; GFX10_1-NEXT: v_writelane_b32 v22, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v22, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v22, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v22, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v22, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v22, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v22, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v22, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v22, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v22, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v22, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v22, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v22, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v22, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v22, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v22, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v22, s31, 16 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240 -; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 -; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -1441,23 +1443,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v22, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v22, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v22, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v22, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v22, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v22, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v22, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v22, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v22, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v22, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v22, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v22, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v22, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v22, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v22, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v22, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v22, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload @@ -1473,31 +1475,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 +; GFX10_3-NEXT: v_writelane_b32 v22, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v22, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v22, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v22, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v22, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v22, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v22, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v22, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v22, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v22, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v22, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v22, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v22, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v22, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v22, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v22, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v22, s31, 16 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240 -; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 -; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -1506,23 +1508,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v22, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v22, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v22, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v22, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v22, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v22, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v22, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v22, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v22, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v22, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v22, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v22, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v22, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v22, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v22, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v22, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v22, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload @@ -1537,30 +1539,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v22, s30, 0 +; GFX11-NEXT: v_writelane_b32 v22, s33, 0 +; GFX11-NEXT: v_writelane_b32 v22, s34, 1 +; GFX11-NEXT: v_writelane_b32 v22, s35, 2 +; GFX11-NEXT: v_writelane_b32 v22, s36, 3 +; GFX11-NEXT: v_writelane_b32 v22, s37, 4 +; GFX11-NEXT: v_writelane_b32 v22, s38, 5 +; GFX11-NEXT: v_writelane_b32 v22, s39, 6 +; GFX11-NEXT: v_writelane_b32 v22, s48, 7 +; GFX11-NEXT: v_writelane_b32 v22, s49, 8 +; GFX11-NEXT: v_writelane_b32 v22, s50, 9 +; GFX11-NEXT: v_writelane_b32 v22, s51, 10 +; GFX11-NEXT: v_writelane_b32 v22, s52, 11 +; GFX11-NEXT: v_writelane_b32 v22, s53, 12 +; GFX11-NEXT: v_writelane_b32 v22, s54, 13 +; GFX11-NEXT: v_writelane_b32 v22, s55, 14 +; GFX11-NEXT: v_writelane_b32 v22, s30, 15 +; GFX11-NEXT: v_writelane_b32 v22, s31, 16 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_add_i32 s58, s32, 0x4240 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v22, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v22, s33, 2 -; GFX11-NEXT: v_writelane_b32 v22, s34, 3 -; GFX11-NEXT: v_writelane_b32 v22, s35, 4 -; GFX11-NEXT: v_writelane_b32 v22, s36, 5 -; GFX11-NEXT: v_writelane_b32 v22, s37, 6 -; GFX11-NEXT: v_writelane_b32 v22, s38, 7 -; GFX11-NEXT: v_writelane_b32 v22, s39, 8 -; GFX11-NEXT: v_writelane_b32 v22, s48, 9 -; GFX11-NEXT: v_writelane_b32 v22, s49, 10 -; GFX11-NEXT: v_writelane_b32 v22, s50, 11 -; GFX11-NEXT: v_writelane_b32 v22, s51, 12 -; GFX11-NEXT: v_writelane_b32 v22, s52, 13 -; GFX11-NEXT: v_writelane_b32 v22, s53, 14 -; GFX11-NEXT: v_writelane_b32 v22, s54, 15 -; GFX11-NEXT: v_writelane_b32 v22, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND @@ -1569,24 +1571,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s55, v22, 16 -; GFX11-NEXT: v_readlane_b32 s54, v22, 15 -; GFX11-NEXT: v_readlane_b32 s53, v22, 14 -; GFX11-NEXT: v_readlane_b32 s52, v22, 13 -; GFX11-NEXT: v_readlane_b32 s51, v22, 12 -; GFX11-NEXT: v_readlane_b32 s50, v22, 11 -; GFX11-NEXT: v_readlane_b32 s49, v22, 10 -; GFX11-NEXT: v_readlane_b32 s48, v22, 9 -; GFX11-NEXT: v_readlane_b32 s39, v22, 8 -; GFX11-NEXT: v_readlane_b32 s38, v22, 7 -; GFX11-NEXT: v_readlane_b32 s37, v22, 6 -; GFX11-NEXT: v_readlane_b32 s36, v22, 5 -; GFX11-NEXT: v_readlane_b32 s35, v22, 4 -; GFX11-NEXT: v_readlane_b32 s34, v22, 3 -; GFX11-NEXT: v_readlane_b32 s33, v22, 2 -; GFX11-NEXT: v_readlane_b32 s31, v22, 1 -; GFX11-NEXT: v_readlane_b32 s30, v22, 0 +; GFX11-NEXT: v_readlane_b32 s30, v22, 15 +; GFX11-NEXT: v_readlane_b32 s31, v22, 16 +; GFX11-NEXT: v_readlane_b32 s55, v22, 14 +; GFX11-NEXT: v_readlane_b32 s54, v22, 13 +; GFX11-NEXT: v_readlane_b32 s53, v22, 12 +; GFX11-NEXT: v_readlane_b32 s52, v22, 11 +; GFX11-NEXT: v_readlane_b32 s51, v22, 10 +; GFX11-NEXT: v_readlane_b32 s50, v22, 9 +; GFX11-NEXT: v_readlane_b32 s49, v22, 8 +; GFX11-NEXT: v_readlane_b32 s48, v22, 7 +; GFX11-NEXT: v_readlane_b32 s39, v22, 6 +; GFX11-NEXT: v_readlane_b32 s38, v22, 5 +; GFX11-NEXT: v_readlane_b32 s37, v22, 4 +; GFX11-NEXT: v_readlane_b32 s36, v22, 3 +; GFX11-NEXT: v_readlane_b32 s35, v22, 2 +; GFX11-NEXT: v_readlane_b32 s34, v22, 1 +; GFX11-NEXT: v_readlane_b32 s33, v22, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload @@ -1605,29 +1606,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v22, s30, 0 +; GFX12-NEXT: v_writelane_b32 v22, s33, 0 +; GFX12-NEXT: v_writelane_b32 v22, s34, 1 +; GFX12-NEXT: v_writelane_b32 v22, s35, 2 +; GFX12-NEXT: v_writelane_b32 v22, s36, 3 +; GFX12-NEXT: v_writelane_b32 v22, s37, 4 +; GFX12-NEXT: v_writelane_b32 v22, s38, 5 +; GFX12-NEXT: v_writelane_b32 v22, s39, 6 +; GFX12-NEXT: v_writelane_b32 v22, s48, 7 +; GFX12-NEXT: v_writelane_b32 v22, s49, 8 +; GFX12-NEXT: v_writelane_b32 v22, s50, 9 +; GFX12-NEXT: v_writelane_b32 v22, s51, 10 +; GFX12-NEXT: v_writelane_b32 v22, s52, 11 +; GFX12-NEXT: v_writelane_b32 v22, s53, 12 +; GFX12-NEXT: v_writelane_b32 v22, s54, 13 +; GFX12-NEXT: v_writelane_b32 v22, s55, 14 +; GFX12-NEXT: v_writelane_b32 v22, s30, 15 +; GFX12-NEXT: v_writelane_b32 v22, s31, 16 ; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v22, s31, 1 -; GFX12-NEXT: v_writelane_b32 v22, s33, 2 -; GFX12-NEXT: v_writelane_b32 v22, s34, 3 -; GFX12-NEXT: v_writelane_b32 v22, s35, 4 -; GFX12-NEXT: v_writelane_b32 v22, s36, 5 -; GFX12-NEXT: v_writelane_b32 v22, s37, 6 -; GFX12-NEXT: v_writelane_b32 v22, s38, 7 -; GFX12-NEXT: v_writelane_b32 v22, s39, 8 -; GFX12-NEXT: v_writelane_b32 v22, s48, 9 -; GFX12-NEXT: v_writelane_b32 v22, s49, 10 -; GFX12-NEXT: v_writelane_b32 v22, s50, 11 -; GFX12-NEXT: v_writelane_b32 v22, s51, 12 -; GFX12-NEXT: v_writelane_b32 v22, s52, 13 -; GFX12-NEXT: v_writelane_b32 v22, s53, 14 -; GFX12-NEXT: v_writelane_b32 v22, s54, 15 -; GFX12-NEXT: v_writelane_b32 v22, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND @@ -1637,23 +1638,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s55, v22, 16 -; GFX12-NEXT: v_readlane_b32 s54, v22, 15 -; GFX12-NEXT: v_readlane_b32 s53, v22, 14 -; GFX12-NEXT: v_readlane_b32 s52, v22, 13 -; GFX12-NEXT: v_readlane_b32 s51, v22, 12 -; GFX12-NEXT: v_readlane_b32 s50, v22, 11 -; GFX12-NEXT: v_readlane_b32 s49, v22, 10 -; GFX12-NEXT: v_readlane_b32 s48, v22, 9 -; GFX12-NEXT: v_readlane_b32 s39, v22, 8 -; GFX12-NEXT: v_readlane_b32 s38, v22, 7 -; GFX12-NEXT: v_readlane_b32 s37, v22, 6 -; GFX12-NEXT: v_readlane_b32 s36, v22, 5 -; GFX12-NEXT: v_readlane_b32 s35, v22, 4 -; GFX12-NEXT: v_readlane_b32 s34, v22, 3 -; GFX12-NEXT: v_readlane_b32 s33, v22, 2 -; GFX12-NEXT: v_readlane_b32 s31, v22, 1 -; GFX12-NEXT: v_readlane_b32 s30, v22, 0 +; GFX12-NEXT: v_readlane_b32 s30, v22, 15 +; GFX12-NEXT: v_readlane_b32 s31, v22, 16 +; GFX12-NEXT: v_readlane_b32 s55, v22, 14 +; GFX12-NEXT: v_readlane_b32 s54, v22, 13 +; GFX12-NEXT: v_readlane_b32 s53, v22, 12 +; GFX12-NEXT: v_readlane_b32 s52, v22, 11 +; GFX12-NEXT: v_readlane_b32 s51, v22, 10 +; GFX12-NEXT: v_readlane_b32 s50, v22, 9 +; GFX12-NEXT: v_readlane_b32 s49, v22, 8 +; GFX12-NEXT: v_readlane_b32 s48, v22, 7 +; GFX12-NEXT: v_readlane_b32 s39, v22, 6 +; GFX12-NEXT: v_readlane_b32 s38, v22, 5 +; GFX12-NEXT: v_readlane_b32 s37, v22, 4 +; GFX12-NEXT: v_readlane_b32 s36, v22, 3 +; GFX12-NEXT: v_readlane_b32 s35, v22, 2 +; GFX12-NEXT: v_readlane_b32 s34, v22, 1 +; GFX12-NEXT: v_readlane_b32 s33, v22, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll index ca16e251d51cf..e84d3c913328c 100644 --- a/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll @@ -8941,6 +8941,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-LABEL: v_maximumnum_v32bf16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX8-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -8989,13 +8992,10 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX8-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX8-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX8-NEXT: s_waitcnt vmcnt(3) +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX8-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -9563,6 +9563,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-LABEL: v_maximumnum_v32bf16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX900-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9611,14 +9614,11 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX900-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX900-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100 -; GFX900-NEXT: s_waitcnt vmcnt(3) +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX900-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -10170,6 +10170,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-LABEL: v_maximumnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v50, off, s32 ; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX950-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -10210,21 +10213,18 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v19 ; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v34 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 +; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v31, v34, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v38 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 +; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 +; GFX950-NEXT: s_mov_b32 s0, 0x5040100 ; GFX950-NEXT: v_cndmask_b32_e32 v34, v35, v38, vcc ; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39 ; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 ; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v26 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc ; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v32, v32 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX950-NEXT: s_mov_b32 s0, 0x5040100 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v50 ; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v50 diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index 02f39e25cb447..06213ef3e06ea 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -81,7 +81,6 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) ; ALIGNED-LABEL: memcpy_p0_p0_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -90,6 +89,7 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) ; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB0_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 @@ -837,7 +837,6 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) ; ALIGNED-LABEL: memcpy_p1_p1_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -846,6 +845,7 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) ; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB1_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 @@ -2340,7 +2340,6 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) ; ALIGNED-LABEL: memcpy_p5_p5_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -2389,6 +2388,7 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) ; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB3_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: s_clause 0x34 diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll index 416a601797617..8184e1927146d 100644 --- a/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll @@ -8980,6 +8980,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-LABEL: v_minimumnum_v32bf16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX8-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9029,13 +9032,10 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX8-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX8-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX8-NEXT: s_waitcnt vmcnt(3) +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX8-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -9603,6 +9603,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-LABEL: v_minimumnum_v32bf16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX900-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9652,13 +9655,10 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX900-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX900-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX900-NEXT: s_waitcnt vmcnt(3) +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX900-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -10211,6 +10211,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-LABEL: v_minimumnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v50, off, s32 ; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX950-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -10252,20 +10255,17 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v34 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v19 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v31, v34, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v38 -; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 +; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX950-NEXT: v_cndmask_b32_e32 v34, v35, v38, vcc ; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39 ; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 ; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v26 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc ; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v32, v32 -; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v50 ; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v50 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 33cd598aae9b5..486a08d6ee8cd 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -194,19 +194,19 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: v_writelane_b32 v43, s4, 5 -; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_writelane_b32 v43, s31, 1 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v43, s34, 2 -; GFX9-NEXT: v_writelane_b32 v43, s36, 3 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v43, s34, 0 +; GFX9-NEXT: v_writelane_b32 v43, s36, 1 +; GFX9-NEXT: v_writelane_b32 v43, s37, 2 +; GFX9-NEXT: v_writelane_b32 v43, s30, 3 +; GFX9-NEXT: v_writelane_b32 v43, s31, 4 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v43, s37, 4 ; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, v1 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40 @@ -224,11 +224,11 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s37, v43, 4 -; GFX9-NEXT: v_readlane_b32 s36, v43, 3 -; GFX9-NEXT: v_readlane_b32 s34, v43, 2 -; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 +; GFX9-NEXT: v_readlane_b32 s30, v43, 3 +; GFX9-NEXT: v_readlane_b32 s31, v43, 4 +; GFX9-NEXT: v_readlane_b32 s37, v43, 2 +; GFX9-NEXT: v_readlane_b32 s36, v43, 1 +; GFX9-NEXT: v_readlane_b32 s34, v43, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v43, 5 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 65446a036c91b..878302e4865bb 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -47,8 +47,8 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -190,8 +190,8 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -224,8 +224,8 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 +; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 94e997cf49ddb..da9463b1329c7 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -19,18 +19,18 @@ define void @test_func_call_external_void_func_i32_imm() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v0, 42 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -53,23 +53,23 @@ define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x1400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 42 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index 7155c8e085470..da30190663457 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -233,6 +233,7 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: .cfi_llvm_vector_registers 16, 2623, 0, 32, 2623, 1, 32 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 ; CHECK-NEXT: s_getpc_b64 s[16:17] @@ -247,8 +248,8 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir index c61241c65b326..0822067b6a12c 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -145,6 +145,7 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr10, 32, $exec_lo, 32, 0 ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir index 023bcc563cdcd..ff8418e5b2f60 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir @@ -46,6 +46,22 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W32-NEXT: $m0 = S_MOV_B32 9 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 ; W32-NEXT: $m0 = S_MOV_B32 9 ; W32-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) @@ -74,6 +90,22 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W64-NEXT: $m0 = S_MOV_B32 9 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 ; W64-NEXT: $m0 = S_MOV_B32 9 ; W64-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) @@ -116,6 +148,22 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W32-NEXT: $m0 = S_MOV_B32 16711935 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 160 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 192 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 224 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 512 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 544 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 576 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 608 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 640 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 672 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 704 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 736 ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 ; W32-NEXT: $m0 = S_MOV_B32 16711935 ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) @@ -144,6 +192,22 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W64-NEXT: $m0 = S_MOV_B32 16711935 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 128 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 256 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 320 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 384 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 448 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1024 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1088 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1152 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1216 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 1280 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 1344 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 1408 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 1472 ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 ; W64-NEXT: $m0 = S_MOV_B32 16711935 ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) @@ -214,10 +278,58 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 ; W32-NEXT: $m0 = S_MOV_B32 3 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1024 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1056 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W32-NEXT: $m0 = S_MOV_B32 65 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr104, 32, $exec_lo, 32, 128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr105 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr106 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr107 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr108 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr109 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr110, 32, $exec_lo, 32, 320 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr111 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr120 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr121 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr122 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr123 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr124 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr125 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr126 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr127 ; W32-NEXT: $m0 = S_MOV_B32 1 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr232, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr233 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr234 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr235 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr236 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr237 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr238 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr239 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr248 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr249 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr250 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr251 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr252 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr253 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr254 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr255 ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 ; W32-NEXT: $m0 = S_MOV_B32 1 ; W32-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) @@ -274,10 +386,58 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 ; W64-NEXT: $m0 = S_MOV_B32 3 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 2048 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 2112 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W64-NEXT: $m0 = S_MOV_B32 65 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr104, 32, $exec, 64, 256 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr105 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr106 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr107 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr108 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr109 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr110, 32, $exec, 64, 640 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr111 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr120 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr121 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr122 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr123 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr124 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr125 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr126 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr127 ; W64-NEXT: $m0 = S_MOV_B32 1 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr232, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr233 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr234 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr235 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr236 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr237 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr238 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr239 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr248 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr249 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr250 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr251 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr252 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr253 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr254 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr255 ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 ; W64-NEXT: $m0 = S_MOV_B32 1 ; W64-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) @@ -342,8 +502,40 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 ; W32-NEXT: $m0 = S_MOV_B32 7 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 256 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 288 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 320 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W32-NEXT: $m0 = S_MOV_B32 3 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr72, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr73, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr74 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr75 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr76 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr77 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr78 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr79 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr88 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr89 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr90 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr91 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr92 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr93 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr94 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr95 ; W32-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 ; W32-NEXT: $m0 = S_MOV_B32 3 ; W32-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) @@ -390,8 +582,40 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 ; W64-NEXT: $m0 = S_MOV_B32 7 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 512 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 576 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 640 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W64-NEXT: $m0 = S_MOV_B32 3 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr72, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr73, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr74 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr75 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr76 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr77 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr78 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr79 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr88 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr89 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr90 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr91 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr92 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr93 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr94 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr95 ; W64-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 ; W64-NEXT: $m0 = S_MOV_B32 3 ; W64-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) @@ -444,6 +668,22 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W32-NEXT: $m0 = S_MOV_B32 1 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; W32-NEXT: S_NOP 0, implicit-def $vgpr40 @@ -474,6 +714,22 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W64-NEXT: $m0 = S_MOV_B32 1 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; W64-NEXT: S_NOP 0, implicit-def $vgpr40 @@ -535,6 +791,22 @@ body: | ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; W32-NEXT: $m0 = S_MOV_B32 9 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec ; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 @@ -580,6 +852,22 @@ body: | ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; W64-NEXT: $m0 = S_MOV_B32 9 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec ; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 @@ -664,6 +952,22 @@ body: | ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W32-NEXT: $m0 = S_MOV_B32 11 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 ; W32-NEXT: S_BRANCH %bb.1 ; W32-NEXT: {{ $}} @@ -706,6 +1010,22 @@ body: | ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 ; W64-NEXT: $m0 = S_MOV_B32 11 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 ; W64-NEXT: S_BRANCH %bb.1 ; W64-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll index 29e34a0454d16..4b03896043dbb 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll @@ -20,6 +20,10 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: v_writelane_b32 v41, s16, 4 ; GFX906-NEXT: v_writelane_b32 v41, s34, 2 ; GFX906-NEXT: v_writelane_b32 v41, s35, 3 +; GFX906-NEXT: s_addk_i32 s32, 0x2800 +; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX906-NEXT: v_writelane_b32 v41, s30, 0 +; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: s_mov_b32 s21, s15 ; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX906-NEXT: s_mov_b32 s22, s14 @@ -36,11 +40,7 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: v_writelane_b32 v39, s9, 7 ; GFX906-NEXT: v_writelane_b32 v39, s6, 8 ; GFX906-NEXT: v_writelane_b32 v39, s7, 9 -; GFX906-NEXT: v_writelane_b32 v41, s30, 0 ; GFX906-NEXT: v_writelane_b32 v39, s4, 10 -; GFX906-NEXT: s_addk_i32 s32, 0x2800 -; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: v_mov_b32_e32 v32, v31 ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX906-NEXT: s_nop 0 @@ -338,8 +338,8 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: v_readlane_b32 s30, v41, 0 +; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: s_mov_b32 s32, s33 ; GFX906-NEXT: v_readlane_b32 s4, v41, 4 ; GFX906-NEXT: v_readlane_b32 s34, v41, 2 @@ -388,21 +388,14 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: s_addk_i32 s32, 0x2c00 ; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 s[16:17], exec -; GFX908-NEXT: s_mov_b64 exec, 1 +; GFX908-NEXT: s_mov_b64 exec, 3 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: v_writelane_b32 v2, s30, 0 +; GFX908-NEXT: v_writelane_b32 v2, s31, 1 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] -; GFX908-NEXT: s_mov_b64 s[16:17], exec -; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 -; GFX908-NEXT: v_writelane_b32 v2, s31, 0 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, s[16:17] ; GFX908-NEXT: s_mov_b32 s21, s15 ; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX908-NEXT: s_mov_b32 s22, s14 @@ -735,20 +728,12 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_readlane_b32 s31, v0, 0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, s[4:5] -; GFX908-NEXT: s_mov_b64 s[4:5], exec -; GFX908-NEXT: s_mov_b64 exec, 1 +; GFX908-NEXT: s_mov_b64 exec, 3 ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s30, v0, 0 +; GFX908-NEXT: v_readlane_b32 s31, v0, 1 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll index dba10f19eb500..1260e147fbc53 100644 --- a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll +++ b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll @@ -11,8 +11,8 @@ define void @test_remat_s_getpc_b64() { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: ;;#ASMSTART @@ -20,9 +20,9 @@ define void @test_remat_s_getpc_b64() { ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -36,17 +36,16 @@ define void @test_remat_s_getpc_b64() { ; GFX11-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload @@ -66,21 +65,21 @@ define void @test_remat_s_getpc_b64() { ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v2, s30, 0 +; GFX12-NEXT: v_writelane_b32 v2, s31, 1 ; GFX12-NEXT: s_getpc_b64 s[0:1] ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v2, s31, 1 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_getpc_b64 s[0:1] +; GFX12-NEXT: v_readlane_b32 s30, v2, 0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX12-NEXT: v_readlane_b32 s31, v2, 1 -; GFX12-NEXT: v_readlane_b32 s30, v2, 0 ; GFX12-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index 9e61fa0e681cc..b71a9eeef208e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -28,7 +28,7 @@ body: | ; GCN-LABEL: name: test_main ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0 + ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 @@ -77,87 +77,154 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr1, 0 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 128 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr3, 256 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr4, 384 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr5, 512 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr4, $vgpr255, 0, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr5, $vgpr255, 1, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr6, $vgpr255, 2, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr7, $vgpr255, 3, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr8, $vgpr255, 4, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr9, $vgpr255, 5, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr10, $vgpr255, 6, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr11, $vgpr255, 7, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr12, $vgpr255, 8, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr13, $vgpr255, 9, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr14, $vgpr255, 10, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr15, $vgpr255, 11, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr16, $vgpr255, 12, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr17, $vgpr255, 13, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr18, $vgpr255, 14, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr19, $vgpr255, 15, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr20, $vgpr255, 16, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr21, $vgpr255, 17, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr22, $vgpr255, 18, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr23, $vgpr255, 19, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr24, $vgpr255, 20, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr25, $vgpr255, 21, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr26, $vgpr255, 22, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr27, $vgpr255, 23, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr28, $vgpr255, 24, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr2 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr3 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr29, $vgpr255, 25, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 26, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr64, $vgpr255, 26, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 27, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr65, $vgpr255, 27, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 28, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr66, $vgpr255, 28, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 29, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr67, $vgpr255, 29, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 30, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr68, $vgpr255, 30, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 31, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr69, $vgpr255, 31, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 0, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr70, $vgpr254, 0, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 1, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr71, $vgpr254, 1, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 2, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr72, $vgpr254, 2, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 3, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr73, $vgpr254, 3, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 4, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr74, $vgpr254, 4, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 5, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr75, $vgpr254, 5, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 6, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr76, $vgpr254, 6, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 7, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr77, $vgpr254, 7, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 8, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr78, $vgpr254, 8, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 9, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr79, $vgpr254, 9, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 10, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr80, $vgpr254, 10, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 11, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr81, $vgpr254, 11, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 12, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr82, $vgpr254, 12, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 13, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr83, $vgpr254, 13, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 14, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr84, $vgpr254, 14, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 15, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr85, $vgpr254, 15, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 16, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr86, $vgpr254, 16, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 17, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr87, $vgpr254, 17, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 18, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr88, $vgpr254, 18, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 19, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr89, $vgpr254, 19, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 20, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr90, $vgpr254, 20, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 21, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr91, $vgpr254, 21, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 22, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr92, $vgpr254, 22, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 23, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr93, $vgpr254, 23, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 24, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr94, $vgpr254, 24, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 25, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr95, $vgpr254, 25, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 26, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr96, $vgpr254, 26, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 27, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr97, $vgpr254, 27, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 28, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr98, $vgpr254, 28, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 29, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr99, $vgpr254, 29, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 30, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr100, $vgpr254, 30, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 31, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr101, $vgpr254, 31, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 0, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr102, $vgpr253, 0, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 1, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr103, $vgpr253, 1, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr30, 2, $vgpr4, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr31, 3, $vgpr4, implicit $sgpr30_sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr253, 2, 32, $vgpr253, 3, 32 ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF ; GCN-NEXT: $vgpr5 = IMPLICIT_DEF ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5 @@ -180,48 +247,48 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: liveins: $vcc_hi ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 - ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2 - ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 - ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 - ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 - ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 - ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 - ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 - ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 - ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 - ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 - ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 - ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 - ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 - ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 - ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 - ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 - ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 - ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 - ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 - ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 - ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 - ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 - ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 - ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 - ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 - ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 - ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 - ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 - ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 - ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 - ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 - ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 - ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 - ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 - ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 - ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31 - ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30 - ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29 - ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28 - ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27 - ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26 + ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2, implicit-def $sgpr30_sgpr31 + ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 + ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 + ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 + ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 + ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 + ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 + ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 + ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 + ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 + ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 + ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 + ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 + ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 + ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 + ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 + ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 + ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 + ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 + ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 + ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 + ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 + ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 + ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 + ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 + ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 + ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 + ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 + ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 + ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 + ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 + ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 + ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 + ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 + ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 + ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 + ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31 + ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30 + ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29 + ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28 + ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27 + ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26 ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25 ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24 ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23 @@ -250,11 +317,11 @@ body: | ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 ; GCN-NEXT: $sgpr32 = frame-destroy COPY $sgpr33 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) - ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) - ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) - ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) - ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) + ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5) + ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) + ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) + ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) + ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index 702953c56a5cb..cb54b0ba629c3 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -152,8 +152,8 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v255, 1 ; GCN-NEXT: v_readlane_b32 s30, v255, 0 +; GCN-NEXT: v_readlane_b32 s31, v255, 1 ; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -445,8 +445,8 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v254, 1 ; GCN-NEXT: v_readlane_b32 s30, v254, 0 +; GCN-NEXT: v_readlane_b32 s31, v254, 1 ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -1632,21 +1632,14 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 1 +; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN-NEXT: v_writelane_b32 v0, s31, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: v_writelane_b32 v0, s31, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra@rel32@hi+12 @@ -1656,20 +1649,12 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v0, 0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 1 +; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s30, v0, 0 +; GCN-NEXT: v_readlane_b32 s31, v0, 1 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index 7ee7c83e0122d..7feef49839ed5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -14689,22 +14689,22 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s30 ; GFX900-NEXT: s_mov_b32 s9, s31 +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14718,22 +14718,22 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s30 ; GFX90A-NEXT: s_mov_b32 s9, s31 +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -14829,22 +14829,22 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s30 ; GFX900-NEXT: s_mov_b32 s9, s31 +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14858,22 +14858,22 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s30 ; GFX90A-NEXT: s_mov_b32 s9, s31 +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -14887,22 +14887,23 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s30 ; GFX942-NEXT: s_mov_b32 s9, s31 -; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -14923,22 +14924,22 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s30 ; GFX900-NEXT: s_mov_b32 s13, s31 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14952,22 +14953,22 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s30 ; GFX90A-NEXT: s_mov_b32 s13, s31 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -15087,6 +15088,7 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -15096,13 +15098,13 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s30 ; GFX942-NEXT: s_mov_b32 s9, s31 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -15129,10 +15131,10 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -15170,10 +15172,10 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -15205,22 +15207,23 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s30 ; GFX942-NEXT: s_mov_b32 s13, s31 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -16255,6 +16258,7 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -16263,12 +16267,12 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s17 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -16978,6 +16982,7 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -16986,12 +16991,12 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s19 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -17562,13 +17567,14 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17577,7 +17583,6 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17591,13 +17596,14 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17606,7 +17612,6 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17653,13 +17658,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s20 ; GFX900-NEXT: s_mov_b32 s11, s21 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17680,13 +17685,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s20 ; GFX90A-NEXT: s_mov_b32 s11, s21 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17700,6 +17705,7 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -17708,13 +17714,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s20 ; GFX942-NEXT: s_mov_b32 s11, s21 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -17735,13 +17741,14 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17750,7 +17757,6 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17764,13 +17770,14 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17779,7 +17786,6 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17879,13 +17885,14 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17894,7 +17901,6 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17908,13 +17914,14 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17923,7 +17930,6 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -18403,13 +18409,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s22 ; GFX900-NEXT: s_mov_b32 s11, s23 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -18430,13 +18436,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s22 ; GFX90A-NEXT: s_mov_b32 s11, s23 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -18450,6 +18456,7 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -18458,13 +18465,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s22 ; GFX942-NEXT: s_mov_b32 s11, s23 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19031,13 +19038,14 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19046,7 +19054,6 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19060,13 +19067,14 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19075,7 +19083,6 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19089,22 +19096,23 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19181,13 +19189,14 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19196,7 +19205,6 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19210,13 +19218,14 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19225,7 +19234,6 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19239,22 +19247,23 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19275,22 +19284,22 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s26, s12 ; GFX900-NEXT: s_mov_b32 s27, s13 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19304,22 +19313,22 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s26, s12 ; GFX90A-NEXT: s_mov_b32 s27, s13 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19357,13 +19366,14 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19372,7 +19382,6 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19386,13 +19395,14 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19401,7 +19411,6 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19415,22 +19424,23 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s27 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19451,10 +19461,10 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -19462,11 +19472,11 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX900-NEXT: s_mov_b32 s31, s13 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19480,10 +19490,10 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -19491,11 +19501,11 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX90A-NEXT: s_mov_b32 s31, s13 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19952,22 +19962,22 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s18 ; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19981,22 +19991,22 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s18 ; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20090,22 +20100,22 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s22 ; GFX900-NEXT: s_mov_b32 s13, s23 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20119,22 +20129,22 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s22 ; GFX90A-NEXT: s_mov_b32 s13, s23 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20172,22 +20182,22 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s26, s14 ; GFX900-NEXT: s_mov_b32 s27, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20201,22 +20211,22 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s26, s14 ; GFX90A-NEXT: s_mov_b32 s27, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20254,22 +20264,22 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s26 ; GFX900-NEXT: s_mov_b32 s13, s27 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20283,22 +20293,22 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s26 ; GFX90A-NEXT: s_mov_b32 s13, s27 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20336,10 +20346,10 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -20347,11 +20357,11 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX900-NEXT: s_mov_b32 s31, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20365,10 +20375,10 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -20376,11 +20386,11 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX90A-NEXT: s_mov_b32 s31, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20934,14 +20944,16 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -20949,7 +20961,6 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21108,14 +21119,16 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s23 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -21123,7 +21136,6 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21332,14 +21344,16 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s27 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -21347,7 +21361,6 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21450,6 +21463,7 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -21461,11 +21475,11 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX942-NEXT: s_mov_b32 s31, s13 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21918,10 +21932,10 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -21959,10 +21973,10 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -21994,22 +22008,23 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s18 ; GFX942-NEXT: s_mov_b32 s13, s19 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22092,10 +22107,10 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22133,10 +22148,10 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22168,22 +22183,23 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s22 ; GFX942-NEXT: s_mov_b32 s13, s23 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22316,10 +22332,10 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22357,10 +22373,10 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22392,22 +22408,23 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s26 ; GFX942-NEXT: s_mov_b32 s13, s27 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22510,6 +22527,7 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -22521,11 +22539,11 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX942-NEXT: s_mov_b32 s31, s15 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -23512,22 +23530,22 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s18 ; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -23541,22 +23559,22 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s18 ; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -23601,13 +23619,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -23628,13 +23646,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -23648,6 +23666,7 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -23656,13 +23675,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s8, s26 ; GFX942-NEXT: s_mov_b32 s9, s27 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -23689,10 +23708,10 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -23730,10 +23749,10 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -23765,22 +23784,23 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s18 ; GFX942-NEXT: s_mov_b32 s15, s19 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -24362,22 +24382,22 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s20 ; GFX900-NEXT: s_mov_b32 s15, s21 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -24391,22 +24411,22 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s20 ; GFX90A-NEXT: s_mov_b32 s15, s21 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -24444,13 +24464,14 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s20 @@ -24459,7 +24480,6 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -24473,13 +24493,14 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s20 @@ -24488,7 +24509,6 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -24533,10 +24553,10 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -24574,10 +24594,10 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -24609,22 +24629,23 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s20 ; GFX942-NEXT: s_mov_b32 s15, s21 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -24727,14 +24748,16 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s20 ; GFX942-NEXT: s_mov_b32 s11, s21 @@ -24742,7 +24765,6 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -25323,13 +25345,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -25350,13 +25372,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -25370,6 +25392,7 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -25378,13 +25401,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s8, s22 ; GFX942-NEXT: s_mov_b32 s9, s23 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -25405,22 +25428,22 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s22 ; GFX900-NEXT: s_mov_b32 s15, s23 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -25434,22 +25457,22 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s22 ; GFX90A-NEXT: s_mov_b32 s15, s23 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -25549,10 +25572,10 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -25590,10 +25613,10 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -25625,22 +25648,23 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s22 ; GFX942-NEXT: s_mov_b32 s15, s23 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26222,22 +26246,22 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s24 ; GFX900-NEXT: s_mov_b32 s15, s25 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -26251,22 +26275,22 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s24 ; GFX90A-NEXT: s_mov_b32 s15, s25 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -26304,13 +26328,14 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s24 @@ -26319,7 +26344,6 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -26333,13 +26357,14 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s24 @@ -26348,7 +26373,6 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -26393,10 +26417,10 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -26434,10 +26458,10 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -26469,22 +26493,23 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s24 ; GFX942-NEXT: s_mov_b32 s15, s25 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26587,14 +26612,16 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s24 ; GFX942-NEXT: s_mov_b32 s11, s25 @@ -26602,7 +26629,6 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26968,6 +26994,7 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -26976,12 +27003,12 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -27118,22 +27145,22 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s26 ; GFX900-NEXT: s_mov_b32 s15, s27 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -27147,22 +27174,22 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s26 ; GFX90A-NEXT: s_mov_b32 s15, s27 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -27200,22 +27227,22 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s24, s14 ; GFX900-NEXT: s_mov_b32 s25, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -27229,22 +27256,22 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s24, s14 ; GFX90A-NEXT: s_mov_b32 s25, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -27288,10 +27315,10 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -27329,10 +27356,10 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -27364,22 +27391,23 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s26 ; GFX942-NEXT: s_mov_b32 s15, s27 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -28075,22 +28103,22 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s28 ; GFX900-NEXT: s_mov_b32 s15, s29 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -28104,22 +28132,22 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s28 ; GFX90A-NEXT: s_mov_b32 s15, s29 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -28157,13 +28185,14 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s28 @@ -28172,7 +28201,6 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -28186,13 +28214,14 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s28 @@ -28201,7 +28230,6 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -28246,10 +28274,10 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -28287,10 +28315,10 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -28322,22 +28350,23 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s28 ; GFX942-NEXT: s_mov_b32 s15, s29 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -28440,14 +28469,16 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s28 ; GFX942-NEXT: s_mov_b32 s11, s29 @@ -28455,7 +28486,6 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -29056,22 +29086,22 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s30 ; GFX900-NEXT: s_mov_b32 s15, s31 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -29085,22 +29115,22 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s30 ; GFX90A-NEXT: s_mov_b32 s15, s31 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -29138,10 +29168,10 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -29149,11 +29179,11 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX900-NEXT: s_mov_b32 s29, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -29167,10 +29197,10 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -29178,11 +29208,11 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX90A-NEXT: s_mov_b32 s29, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -29228,10 +29258,10 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -29269,10 +29299,10 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -29304,22 +29334,23 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s30 ; GFX942-NEXT: s_mov_b32 s15, s31 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -29422,6 +29453,7 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -29433,11 +29465,11 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX942-NEXT: s_mov_b32 s29, s15 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir index 1ffef8e60d90d..9ebf4f57ed7d3 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir @@ -24,10 +24,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 @@ -89,10 +90,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3 @@ -152,10 +154,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir index 2de7d86223eb2..2f769d94f174d 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir @@ -2,9 +2,14 @@ # CHECK-LABEL: name: empty_entry_block # CHECK: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers + # CHECK: SI_RESTORE_S32_FROM_VGPR # CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR # CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 3c3a2f11fc96a..98048e7ace538 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -232,18 +232,18 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_byval_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_byval_i32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -383,13 +383,14 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 @@ -422,11 +423,10 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0 ; GCN-NEXT: v_mov_b32_e32 v29, 0 ; GCN-NEXT: v_mov_b32_e32 v30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -452,14 +452,14 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v42, s4, 2 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v42, s30, 0 +; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v42, s30, 0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: v_mov_b32_e32 v40, v1 ; GCN-NEXT: v_mov_b32_e32 v41, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -469,11 +469,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN-NEXT: v_mov_b32_e32 v1, v40 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: v_readlane_b32 s30, v42, 0 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 ; GCN-NEXT: v_readlane_b32 s31, v42, 1 -; GCN-NEXT: v_readlane_b32 s30, v42, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s6, v42, 2 ; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 @@ -603,23 +603,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; FIJI-NEXT: s_mov_b64 exec, s[18:19] ; FIJI-NEXT: v_writelane_b32 v40, s16, 18 -; FIJI-NEXT: v_writelane_b32 v40, s30, 0 -; FIJI-NEXT: v_writelane_b32 v40, s31, 1 -; FIJI-NEXT: v_writelane_b32 v40, s34, 2 -; FIJI-NEXT: v_writelane_b32 v40, s35, 3 -; FIJI-NEXT: v_writelane_b32 v40, s36, 4 -; FIJI-NEXT: v_writelane_b32 v40, s37, 5 -; FIJI-NEXT: v_writelane_b32 v40, s38, 6 -; FIJI-NEXT: v_writelane_b32 v40, s39, 7 -; FIJI-NEXT: v_writelane_b32 v40, s48, 8 -; FIJI-NEXT: v_writelane_b32 v40, s49, 9 -; FIJI-NEXT: v_writelane_b32 v40, s50, 10 -; FIJI-NEXT: v_writelane_b32 v40, s51, 11 -; FIJI-NEXT: v_writelane_b32 v40, s52, 12 -; FIJI-NEXT: v_writelane_b32 v40, s53, 13 -; FIJI-NEXT: v_writelane_b32 v40, s54, 14 -; FIJI-NEXT: v_writelane_b32 v40, s55, 15 -; FIJI-NEXT: v_writelane_b32 v40, s64, 16 +; FIJI-NEXT: s_addk_i32 s32, 0x400 +; FIJI-NEXT: v_writelane_b32 v40, s34, 0 +; FIJI-NEXT: v_writelane_b32 v40, s35, 1 +; FIJI-NEXT: v_writelane_b32 v40, s36, 2 +; FIJI-NEXT: v_writelane_b32 v40, s37, 3 +; FIJI-NEXT: v_writelane_b32 v40, s38, 4 +; FIJI-NEXT: v_writelane_b32 v40, s39, 5 +; FIJI-NEXT: v_writelane_b32 v40, s48, 6 +; FIJI-NEXT: v_writelane_b32 v40, s49, 7 +; FIJI-NEXT: v_writelane_b32 v40, s50, 8 +; FIJI-NEXT: v_writelane_b32 v40, s51, 9 +; FIJI-NEXT: v_writelane_b32 v40, s52, 10 +; FIJI-NEXT: v_writelane_b32 v40, s53, 11 +; FIJI-NEXT: v_writelane_b32 v40, s54, 12 +; FIJI-NEXT: v_writelane_b32 v40, s55, 13 +; FIJI-NEXT: v_writelane_b32 v40, s64, 14 +; FIJI-NEXT: v_writelane_b32 v40, s65, 15 +; FIJI-NEXT: v_writelane_b32 v40, s30, 16 +; FIJI-NEXT: v_writelane_b32 v40, s31, 17 ; FIJI-NEXT: s_mov_b32 s50, s15 ; FIJI-NEXT: s_mov_b32 s51, s14 ; FIJI-NEXT: s_mov_b32 s52, s13 @@ -630,8 +632,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5] ; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4 ; FIJI-NEXT: s_mov_b64 s[54:55], exec -; FIJI-NEXT: s_addk_i32 s32, 0x400 -; FIJI-NEXT: v_writelane_b32 v40, s65, 17 ; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; FIJI-NEXT: v_readfirstlane_b32 s16, v0 ; FIJI-NEXT: v_readfirstlane_b32 s17, v1 @@ -657,25 +657,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_cbranch_execnz .LBB18_1 ; FIJI-NEXT: ; %bb.2: ; FIJI-NEXT: s_mov_b64 exec, s[54:55] +; FIJI-NEXT: v_readlane_b32 s30, v40, 16 ; FIJI-NEXT: v_mov_b32_e32 v0, v4 -; FIJI-NEXT: v_readlane_b32 s65, v40, 17 -; FIJI-NEXT: v_readlane_b32 s64, v40, 16 -; FIJI-NEXT: v_readlane_b32 s55, v40, 15 -; FIJI-NEXT: v_readlane_b32 s54, v40, 14 -; FIJI-NEXT: v_readlane_b32 s53, v40, 13 -; FIJI-NEXT: v_readlane_b32 s52, v40, 12 -; FIJI-NEXT: v_readlane_b32 s51, v40, 11 -; FIJI-NEXT: v_readlane_b32 s50, v40, 10 -; FIJI-NEXT: v_readlane_b32 s49, v40, 9 -; FIJI-NEXT: v_readlane_b32 s48, v40, 8 -; FIJI-NEXT: v_readlane_b32 s39, v40, 7 -; FIJI-NEXT: v_readlane_b32 s38, v40, 6 -; FIJI-NEXT: v_readlane_b32 s37, v40, 5 -; FIJI-NEXT: v_readlane_b32 s36, v40, 4 -; FIJI-NEXT: v_readlane_b32 s35, v40, 3 -; FIJI-NEXT: v_readlane_b32 s34, v40, 2 -; FIJI-NEXT: v_readlane_b32 s31, v40, 1 -; FIJI-NEXT: v_readlane_b32 s30, v40, 0 +; FIJI-NEXT: v_readlane_b32 s31, v40, 17 +; FIJI-NEXT: v_readlane_b32 s65, v40, 15 +; FIJI-NEXT: v_readlane_b32 s64, v40, 14 +; FIJI-NEXT: v_readlane_b32 s55, v40, 13 +; FIJI-NEXT: v_readlane_b32 s54, v40, 12 +; FIJI-NEXT: v_readlane_b32 s53, v40, 11 +; FIJI-NEXT: v_readlane_b32 s52, v40, 10 +; FIJI-NEXT: v_readlane_b32 s51, v40, 9 +; FIJI-NEXT: v_readlane_b32 s50, v40, 8 +; FIJI-NEXT: v_readlane_b32 s49, v40, 7 +; FIJI-NEXT: v_readlane_b32 s48, v40, 6 +; FIJI-NEXT: v_readlane_b32 s39, v40, 5 +; FIJI-NEXT: v_readlane_b32 s38, v40, 4 +; FIJI-NEXT: v_readlane_b32 s37, v40, 3 +; FIJI-NEXT: v_readlane_b32 s36, v40, 2 +; FIJI-NEXT: v_readlane_b32 s35, v40, 1 +; FIJI-NEXT: v_readlane_b32 s34, v40, 0 ; FIJI-NEXT: s_mov_b32 s32, s33 ; FIJI-NEXT: v_readlane_b32 s4, v40, 18 ; FIJI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -694,23 +694,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HAWAII-NEXT: s_mov_b64 exec, s[18:19] ; HAWAII-NEXT: v_writelane_b32 v40, s16, 18 -; HAWAII-NEXT: v_writelane_b32 v40, s30, 0 -; HAWAII-NEXT: v_writelane_b32 v40, s31, 1 -; HAWAII-NEXT: v_writelane_b32 v40, s34, 2 -; HAWAII-NEXT: v_writelane_b32 v40, s35, 3 -; HAWAII-NEXT: v_writelane_b32 v40, s36, 4 -; HAWAII-NEXT: v_writelane_b32 v40, s37, 5 -; HAWAII-NEXT: v_writelane_b32 v40, s38, 6 -; HAWAII-NEXT: v_writelane_b32 v40, s39, 7 -; HAWAII-NEXT: v_writelane_b32 v40, s48, 8 -; HAWAII-NEXT: v_writelane_b32 v40, s49, 9 -; HAWAII-NEXT: v_writelane_b32 v40, s50, 10 -; HAWAII-NEXT: v_writelane_b32 v40, s51, 11 -; HAWAII-NEXT: v_writelane_b32 v40, s52, 12 -; HAWAII-NEXT: v_writelane_b32 v40, s53, 13 -; HAWAII-NEXT: v_writelane_b32 v40, s54, 14 -; HAWAII-NEXT: v_writelane_b32 v40, s55, 15 -; HAWAII-NEXT: v_writelane_b32 v40, s64, 16 +; HAWAII-NEXT: s_addk_i32 s32, 0x400 +; HAWAII-NEXT: v_writelane_b32 v40, s34, 0 +; HAWAII-NEXT: v_writelane_b32 v40, s35, 1 +; HAWAII-NEXT: v_writelane_b32 v40, s36, 2 +; HAWAII-NEXT: v_writelane_b32 v40, s37, 3 +; HAWAII-NEXT: v_writelane_b32 v40, s38, 4 +; HAWAII-NEXT: v_writelane_b32 v40, s39, 5 +; HAWAII-NEXT: v_writelane_b32 v40, s48, 6 +; HAWAII-NEXT: v_writelane_b32 v40, s49, 7 +; HAWAII-NEXT: v_writelane_b32 v40, s50, 8 +; HAWAII-NEXT: v_writelane_b32 v40, s51, 9 +; HAWAII-NEXT: v_writelane_b32 v40, s52, 10 +; HAWAII-NEXT: v_writelane_b32 v40, s53, 11 +; HAWAII-NEXT: v_writelane_b32 v40, s54, 12 +; HAWAII-NEXT: v_writelane_b32 v40, s55, 13 +; HAWAII-NEXT: v_writelane_b32 v40, s64, 14 +; HAWAII-NEXT: v_writelane_b32 v40, s65, 15 +; HAWAII-NEXT: v_writelane_b32 v40, s30, 16 +; HAWAII-NEXT: v_writelane_b32 v40, s31, 17 ; HAWAII-NEXT: s_mov_b32 s50, s15 ; HAWAII-NEXT: s_mov_b32 s51, s14 ; HAWAII-NEXT: s_mov_b32 s52, s13 @@ -721,8 +723,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5] ; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; HAWAII-NEXT: s_mov_b64 s[54:55], exec -; HAWAII-NEXT: s_addk_i32 s32, 0x400 -; HAWAII-NEXT: v_writelane_b32 v40, s65, 17 ; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; HAWAII-NEXT: v_readfirstlane_b32 s16, v0 ; HAWAII-NEXT: v_readfirstlane_b32 s17, v1 @@ -748,25 +748,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_cbranch_execnz .LBB18_1 ; HAWAII-NEXT: ; %bb.2: ; HAWAII-NEXT: s_mov_b64 exec, s[54:55] +; HAWAII-NEXT: v_readlane_b32 s30, v40, 16 ; HAWAII-NEXT: v_mov_b32_e32 v0, v4 -; HAWAII-NEXT: v_readlane_b32 s65, v40, 17 -; HAWAII-NEXT: v_readlane_b32 s64, v40, 16 -; HAWAII-NEXT: v_readlane_b32 s55, v40, 15 -; HAWAII-NEXT: v_readlane_b32 s54, v40, 14 -; HAWAII-NEXT: v_readlane_b32 s53, v40, 13 -; HAWAII-NEXT: v_readlane_b32 s52, v40, 12 -; HAWAII-NEXT: v_readlane_b32 s51, v40, 11 -; HAWAII-NEXT: v_readlane_b32 s50, v40, 10 -; HAWAII-NEXT: v_readlane_b32 s49, v40, 9 -; HAWAII-NEXT: v_readlane_b32 s48, v40, 8 -; HAWAII-NEXT: v_readlane_b32 s39, v40, 7 -; HAWAII-NEXT: v_readlane_b32 s38, v40, 6 -; HAWAII-NEXT: v_readlane_b32 s37, v40, 5 -; HAWAII-NEXT: v_readlane_b32 s36, v40, 4 -; HAWAII-NEXT: v_readlane_b32 s35, v40, 3 -; HAWAII-NEXT: v_readlane_b32 s34, v40, 2 -; HAWAII-NEXT: v_readlane_b32 s31, v40, 1 -; HAWAII-NEXT: v_readlane_b32 s30, v40, 0 +; HAWAII-NEXT: v_readlane_b32 s31, v40, 17 +; HAWAII-NEXT: v_readlane_b32 s65, v40, 15 +; HAWAII-NEXT: v_readlane_b32 s64, v40, 14 +; HAWAII-NEXT: v_readlane_b32 s55, v40, 13 +; HAWAII-NEXT: v_readlane_b32 s54, v40, 12 +; HAWAII-NEXT: v_readlane_b32 s53, v40, 11 +; HAWAII-NEXT: v_readlane_b32 s52, v40, 10 +; HAWAII-NEXT: v_readlane_b32 s51, v40, 9 +; HAWAII-NEXT: v_readlane_b32 s50, v40, 8 +; HAWAII-NEXT: v_readlane_b32 s49, v40, 7 +; HAWAII-NEXT: v_readlane_b32 s48, v40, 6 +; HAWAII-NEXT: v_readlane_b32 s39, v40, 5 +; HAWAII-NEXT: v_readlane_b32 s38, v40, 4 +; HAWAII-NEXT: v_readlane_b32 s37, v40, 3 +; HAWAII-NEXT: v_readlane_b32 s36, v40, 2 +; HAWAII-NEXT: v_readlane_b32 s35, v40, 1 +; HAWAII-NEXT: v_readlane_b32 s34, v40, 0 ; HAWAII-NEXT: s_mov_b32 s32, s33 ; HAWAII-NEXT: v_readlane_b32 s4, v40, 18 ; HAWAII-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -785,23 +785,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s16, 18 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 -; GFX9-NEXT: v_writelane_b32 v40, s64, 16 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s64, 14 +; GFX9-NEXT: v_writelane_b32 v40, s65, 15 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: s_mov_b32 s50, s15 ; GFX9-NEXT: s_mov_b32 s51, s14 ; GFX9-NEXT: s_mov_b32 s52, s13 @@ -812,8 +814,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5] ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 ; GFX9-NEXT: s_mov_b64 s[54:55], exec -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s65, 17 ; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_readfirstlane_b32 s16, v0 ; GFX9-NEXT: v_readfirstlane_b32 s17, v1 @@ -839,25 +839,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_cbranch_execnz .LBB18_1 ; GFX9-NEXT: ; %bb.2: ; GFX9-NEXT: s_mov_b64 exec, s[54:55] +; GFX9-NEXT: v_readlane_b32 s30, v40, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-NEXT: v_readlane_b32 s65, v40, 17 -; GFX9-NEXT: v_readlane_b32 s64, v40, 16 -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: v_readlane_b32 s65, v40, 15 +; GFX9-NEXT: v_readlane_b32 s64, v40, 14 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 18 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir index 24c631ce5e15f..7b3402494f39f 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir @@ -16,10 +16,15 @@ body: | ; CHECK: liveins: $sgpr50, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $vgpr63, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr50, $vgpr63, 0, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr52, 1, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr52, $vgpr63, 1, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr53, 2, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr53, $vgpr63, 2, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr54, 3, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr54, $vgpr63, 3, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr55, 4, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr55, $vgpr63, 4, 32 ; CHECK-NEXT: S_NOP 0, implicit $sgpr50 ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit $sgpr52 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir index 85a615c3d8ae8..866ce8a0c0293 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir @@ -13,6 +13,7 @@ body: | ; CHECK: liveins: $sgpr50, $vgpr63 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr50, $vgpr63, 0, 32 ; CHECK-NEXT: S_NOP 0, implicit $sgpr50 ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0 S_NOP 0, implicit $sgpr50 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir index fa3fd3bc6da5b..b0be5676e26a2 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -56,21 +56,37 @@ body: | ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr64, $vgpr63, 0, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr65, $vgpr63, 1, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr66, $vgpr63, 2, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr67, $vgpr63, 3, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr68, $vgpr63, 4, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr69, $vgpr63, 5, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr70, $vgpr63, 6, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr71, $vgpr63, 7, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr80, 8, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr80, $vgpr63, 8, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr81, 9, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr81, $vgpr63, 9, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr82, 10, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr82, $vgpr63, 10, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr83, 11, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr83, $vgpr63, 11, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr84, 12, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr84, $vgpr63, 12, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr85, 13, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr85, $vgpr63, 13, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr86, 14, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr86, $vgpr63, 14, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr87, 15, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr87, $vgpr63, 15, 32 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll index bfadfd860edf6..94e5f936a35fd 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll @@ -14,15 +14,15 @@ define i32 @non_entry_func(i32 %x) { ; CHECK-NEXT: scratch_store_b32 off, v2, s32 offset:100 ; 4-byte Folded Spill ; CHECK-NEXT: s_wait_alu 0xfffe ; CHECK-NEXT: s_mov_b32 exec_lo, s0 -; CHECK-NEXT: v_writelane_b32 v2, s48, 0 ; CHECK-NEXT: s_mov_b32 m0, 0x110003 -; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Spill ; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4 ; CHECK-NEXT: s_mov_b32 m0, 1 -; CHECK-NEXT: v_writelane_b32 v2, s49, 1 ; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Spill ; CHECK-NEXT: scratch_store_block off, v[120:151], s32 +; CHECK-NEXT: v_writelane_b32 v2, s48, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_writelane_b32 v2, s49, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_nop ; CHECK-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll index e962d1bad9779..1184d1a94c3dc 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll @@ -142,8 +142,8 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { ; CHECK-NEXT: v_writelane_b32 v1, s99, 32 ; CHECK-NEXT: v_writelane_b32 v1, s100, 33 ; CHECK-NEXT: v_writelane_b32 v1, s101, 34 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: v_writelane_b32 v1, s102, 35 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index cf827945fb5f7..d57a9ca42efa5 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -293,26 +293,26 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: v_writelane_b32 v40, s34, 3 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_add_i32 s32, s32, 0x30000 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_mov_b32_e32 v32, 0 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:1024 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s34 ; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s34 offset:4 -; GCN-NEXT: s_add_i32 s32, s32, 0x30000 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: v_readlane_b32 s34, v40, 3 @@ -453,7 +453,7 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: v_writelane_b32 v39, s4, 32 ; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 ; GCN-NEXT: v_writelane_b32 v39, s48, 1 ; GCN-NEXT: v_writelane_b32 v39, s49, 2 @@ -485,8 +485,8 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: v_writelane_b32 v39, s99, 28 ; GCN-NEXT: v_writelane_b32 v39, s100, 29 ; GCN-NEXT: v_writelane_b32 v39, s101, 30 -; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: v_writelane_b32 v39, s102, 31 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -576,7 +576,7 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: v_writelane_b32 v39, s4, 32 ; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: s_add_i32 s32, s32, 0x46000 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 ; GCN-NEXT: v_writelane_b32 v39, s48, 1 ; GCN-NEXT: v_writelane_b32 v39, s49, 2 @@ -608,9 +608,9 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: v_writelane_b32 v39, s99, 28 ; GCN-NEXT: v_writelane_b32 v39, s100, 29 ; GCN-NEXT: v_writelane_b32 v39, s101, 30 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1080 -; GCN-NEXT: s_add_i32 s32, s32, 0x46000 ; GCN-NEXT: v_writelane_b32 v39, s102, 31 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1080 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index d2394bab82c77..70bcb99e05777 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -1270,24 +1270,24 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; WAVE32-OPT-NEXT: s_mov_b32 exec_lo, s16 ; WAVE32-OPT-NEXT: v_writelane_b32 v32, s30, 0 +; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200 +; WAVE32-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, 17 -; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200 -; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE32-OPT-NEXT: s_mov_b32 s18, s32 +; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE32-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo -; WAVE32-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-OPT-NEXT: s_lshr_b32 s19, s18, 5 ; WAVE32-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE32-OPT-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; WAVE32-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE32-OPT-NEXT: s_mov_b32 s32, s18 +; WAVE32-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE32-OPT-NEXT: ;;#ASMSTART ; WAVE32-OPT-NEXT: ; use s19 ; WAVE32-OPT-NEXT: ;;#ASMEND ; WAVE32-OPT-NEXT: v_readlane_b32 s31, v32, 1 -; WAVE32-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE32-OPT-NEXT: s_mov_b32 s32, s33 ; WAVE32-OPT-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1305,24 +1305,24 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; WAVE64-OPT-NEXT: s_mov_b64 exec, s[16:17] ; WAVE64-OPT-NEXT: v_writelane_b32 v32, s30, 0 +; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400 +; WAVE64-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, 17 -; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400 -; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE64-OPT-NEXT: s_mov_b32 s18, s32 +; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE64-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo -; WAVE64-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-OPT-NEXT: s_lshr_b32 s19, s18, 6 ; WAVE64-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE64-OPT-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE64-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; WAVE64-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE64-OPT-NEXT: s_mov_b32 s32, s18 +; WAVE64-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE64-OPT-NEXT: ;;#ASMSTART ; WAVE64-OPT-NEXT: ; use s19 ; WAVE64-OPT-NEXT: ;;#ASMEND ; WAVE64-OPT-NEXT: v_readlane_b32 s31, v32, 1 -; WAVE64-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE64-OPT-NEXT: s_mov_b32 s32, s33 ; WAVE64-OPT-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1431,8 +1431,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: ; use s5 ; WAVE32-O0-NEXT: ;;#ASMEND ; WAVE32-O0-NEXT: s_mov_b32 s32, s4 -; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE32-O0-NEXT: v_readlane_b32 s30, v32, 0 +; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE32-O0-NEXT: s_mov_b32 s32, s33 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1542,8 +1542,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: ; use s5 ; WAVE64-O0-NEXT: ;;#ASMEND ; WAVE64-O0-NEXT: s_mov_b32 s32, s4 -; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE64-O0-NEXT: v_readlane_b32 s30, v32, 0 +; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE64-O0-NEXT: s_mov_b32 s32, s33 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1653,8 +1653,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: ; use s5 ; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4 -; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0 +; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s33 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll index 6be2c490e3ea8..249d2dd85243b 100644 --- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll @@ -185,17 +185,17 @@ define void @outgoing_f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 -; GFX7-NEXT: flat_load_ushort v0, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, f16_user@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, f16_user@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: flat_load_ushort v0, v[0:1] +; GFX7-NEXT: s_mov_b32 s17, f16_user@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, f16_user@abs32@lo ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -219,19 +219,19 @@ define void @outgoing_v2f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 -; GFX7-NEXT: flat_load_dword v1, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v2f16_user@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v2f16_user@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: flat_load_dword v1, v[0:1] +; GFX7-NEXT: s_mov_b32 s17, v2f16_user@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v2f16_user@abs32@lo ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -255,19 +255,19 @@ define void @outgoing_f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -297,20 +297,20 @@ define void @outgoing_v2f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v2f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v2f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v2f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v2f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -345,13 +345,13 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v4f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v4f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v4f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v4f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -381,8 +381,8 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 { ; GFX7-NEXT: flat_store_dword v[40:41], v4 ; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v42, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -406,13 +406,13 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -468,8 +468,8 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 { ; GFX7-NEXT: flat_store_dword v[40:41], v8 ; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v42, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -494,10 +494,10 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 @@ -518,6 +518,7 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_readlane_b32 s30, v40, 0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 @@ -527,7 +528,6 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: v_readlane_b32 s31, v40, 1 -; GFX7-NEXT: v_readlane_b32 s30, v40, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll index eecc9f22db415..42dc23a55a6dc 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll @@ -11,18 +11,19 @@ define void @test_load_zext() { ; CHECK-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_writelane_b32 v40, s0, 2 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: s_add_u32 s0, s0, has_spgr_args@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s1, s1, has_spgr_args@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_mov_b32 s0, DescriptorBuffer@abs32@lo -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[2:3] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll index 153ea2957dd75..dab830f8fb286 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll @@ -18,16 +18,16 @@ define void @tail_call_i32_inreg_divergent(i32 %vgpr) { ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, void_func_i32_inreg@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, void_func_i32_inreg@rel32@hi+12 ; CHECK-NEXT: ; illegal copy v0 to s0 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -52,18 +52,18 @@ define void @indirect_tail_call_i32_inreg_divergent(i32 %vgpr) { ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, constant@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, constant@rel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: ; illegal copy v0 to s0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index dbfb054c7d164..4fae53f06f4f2 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -14,13 +14,13 @@ define internal fastcc void @widget() { ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_getpc_b64 s[16:17] ; GFX90A-NEXT: s_add_u32 s16, s16, wobble@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s17, s17, wobble@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 -; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] bb: diff --git a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll index f137f429ebe26..c871293de7436 100644 --- a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll @@ -648,27 +648,26 @@ define i32 @s_in_multiuse_A(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_mov_b32 exec_lo, s16 ; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s30, 2 +; GCN-NEXT: v_writelane_b32 v40, s31, 3 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 ; GCN-NEXT: s_xor_b32 s0, s0, s1 ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: s_mov_b32 s34, s1 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: s_and_b32 s35, s0, s3 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-NEXT: s_mov_b32 s34, s1 ; GCN-NEXT: v_mov_b32_e32 v0, s35 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_xor_b32 s0, s35, s34 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s30, v40, 2 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 3 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s0, v40, 4 ; GCN-NEXT: s_or_saveexec_b32 s1, -1 @@ -695,27 +694,26 @@ define i32 @s_in_multiuse_B(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_mov_b32 exec_lo, s16 ; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s30, 2 +; GCN-NEXT: v_writelane_b32 v40, s31, 3 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GCN-NEXT: s_xor_b32 s0, s0, s1 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: s_mov_b32 s34, s1 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: s_and_b32 s35, s0, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_xor_b32 s0, s35, s34 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s30, v40, 2 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 3 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s0, v40, 4 ; GCN-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll b/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll index a81d9a458e23a..a82453ee23ee9 100644 --- a/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll @@ -8,10 +8,6 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg, ; CHECK-LABEL: eliminate_spill_after_mfma_rewrite: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 -; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -60,6 +56,11 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg, ; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 +; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 +; CHECK-NEXT: s_nop 1 ; CHECK-NEXT: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[0:3] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def v[32:63], v[0:31] @@ -212,10 +213,6 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar ; CHECK-LABEL: eliminate_spill_after_mfma_rewrite_x2: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 -; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -264,6 +261,11 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar ; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 +; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 +; CHECK-NEXT: s_nop 1 ; CHECK-NEXT: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[0:3] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def v[32:63], v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 25e8581fb6cdd..639dcdcbf1c2a 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -14,22 +14,22 @@ define hidden void @widget() { ; GCN-NEXT: v_writelane_b32 v41, s16, 16 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v41, s30, 0 -; GCN-NEXT: v_writelane_b32 v41, s31, 1 -; GCN-NEXT: v_writelane_b32 v41, s34, 2 -; GCN-NEXT: v_writelane_b32 v41, s35, 3 -; GCN-NEXT: v_writelane_b32 v41, s36, 4 -; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s48, 8 -; GCN-NEXT: v_writelane_b32 v41, s49, 9 -; GCN-NEXT: v_writelane_b32 v41, s50, 10 -; GCN-NEXT: v_writelane_b32 v41, s51, 11 -; GCN-NEXT: v_writelane_b32 v41, s52, 12 -; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s54, 14 -; GCN-NEXT: v_writelane_b32 v41, s55, 15 +; GCN-NEXT: v_writelane_b32 v41, s34, 0 +; GCN-NEXT: v_writelane_b32 v41, s35, 1 +; GCN-NEXT: v_writelane_b32 v41, s36, 2 +; GCN-NEXT: v_writelane_b32 v41, s37, 3 +; GCN-NEXT: v_writelane_b32 v41, s38, 4 +; GCN-NEXT: v_writelane_b32 v41, s39, 5 +; GCN-NEXT: v_writelane_b32 v41, s48, 6 +; GCN-NEXT: v_writelane_b32 v41, s49, 7 +; GCN-NEXT: v_writelane_b32 v41, s50, 8 +; GCN-NEXT: v_writelane_b32 v41, s51, 9 +; GCN-NEXT: v_writelane_b32 v41, s52, 10 +; GCN-NEXT: v_writelane_b32 v41, s53, 11 +; GCN-NEXT: v_writelane_b32 v41, s54, 12 +; GCN-NEXT: v_writelane_b32 v41, s55, 13 +; GCN-NEXT: v_writelane_b32 v41, s30, 14 +; GCN-NEXT: v_writelane_b32 v41, s31, 15 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] @@ -93,22 +93,22 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s55, v41, 15 -; GCN-NEXT: v_readlane_b32 s54, v41, 14 -; GCN-NEXT: v_readlane_b32 s53, v41, 13 -; GCN-NEXT: v_readlane_b32 s52, v41, 12 -; GCN-NEXT: v_readlane_b32 s51, v41, 11 -; GCN-NEXT: v_readlane_b32 s50, v41, 10 -; GCN-NEXT: v_readlane_b32 s49, v41, 9 -; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 -; GCN-NEXT: v_readlane_b32 s37, v41, 5 -; GCN-NEXT: v_readlane_b32 s36, v41, 4 -; GCN-NEXT: v_readlane_b32 s35, v41, 3 -; GCN-NEXT: v_readlane_b32 s34, v41, 2 -; GCN-NEXT: v_readlane_b32 s31, v41, 1 -; GCN-NEXT: v_readlane_b32 s30, v41, 0 +; GCN-NEXT: v_readlane_b32 s30, v41, 14 +; GCN-NEXT: v_readlane_b32 s31, v41, 15 +; GCN-NEXT: v_readlane_b32 s55, v41, 13 +; GCN-NEXT: v_readlane_b32 s54, v41, 12 +; GCN-NEXT: v_readlane_b32 s53, v41, 11 +; GCN-NEXT: v_readlane_b32 s52, v41, 10 +; GCN-NEXT: v_readlane_b32 s51, v41, 9 +; GCN-NEXT: v_readlane_b32 s50, v41, 8 +; GCN-NEXT: v_readlane_b32 s49, v41, 7 +; GCN-NEXT: v_readlane_b32 s48, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 5 +; GCN-NEXT: v_readlane_b32 s38, v41, 4 +; GCN-NEXT: v_readlane_b32 s37, v41, 3 +; GCN-NEXT: v_readlane_b32 s36, v41, 2 +; GCN-NEXT: v_readlane_b32 s35, v41, 1 +; GCN-NEXT: v_readlane_b32 s34, v41, 0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v41, 16 @@ -266,32 +266,32 @@ define hidden void @blam() { ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v45, s30, 0 -; GCN-NEXT: v_writelane_b32 v45, s31, 1 -; GCN-NEXT: v_writelane_b32 v45, s34, 2 -; GCN-NEXT: v_writelane_b32 v45, s35, 3 -; GCN-NEXT: v_writelane_b32 v45, s36, 4 -; GCN-NEXT: v_writelane_b32 v45, s37, 5 -; GCN-NEXT: v_writelane_b32 v45, s38, 6 -; GCN-NEXT: v_writelane_b32 v45, s39, 7 -; GCN-NEXT: v_writelane_b32 v45, s48, 8 -; GCN-NEXT: v_writelane_b32 v45, s49, 9 -; GCN-NEXT: v_writelane_b32 v45, s50, 10 -; GCN-NEXT: v_writelane_b32 v45, s51, 11 -; GCN-NEXT: v_writelane_b32 v45, s52, 12 -; GCN-NEXT: v_writelane_b32 v45, s53, 13 -; GCN-NEXT: v_writelane_b32 v45, s54, 14 -; GCN-NEXT: v_writelane_b32 v45, s55, 15 -; GCN-NEXT: v_writelane_b32 v45, s64, 16 -; GCN-NEXT: v_writelane_b32 v45, s65, 17 -; GCN-NEXT: v_writelane_b32 v45, s66, 18 -; GCN-NEXT: v_writelane_b32 v45, s67, 19 -; GCN-NEXT: v_writelane_b32 v45, s68, 20 -; GCN-NEXT: v_writelane_b32 v45, s69, 21 -; GCN-NEXT: v_writelane_b32 v45, s70, 22 -; GCN-NEXT: v_writelane_b32 v45, s71, 23 -; GCN-NEXT: v_writelane_b32 v45, s80, 24 -; GCN-NEXT: v_writelane_b32 v45, s81, 25 +; GCN-NEXT: v_writelane_b32 v45, s34, 0 +; GCN-NEXT: v_writelane_b32 v45, s35, 1 +; GCN-NEXT: v_writelane_b32 v45, s36, 2 +; GCN-NEXT: v_writelane_b32 v45, s37, 3 +; GCN-NEXT: v_writelane_b32 v45, s38, 4 +; GCN-NEXT: v_writelane_b32 v45, s39, 5 +; GCN-NEXT: v_writelane_b32 v45, s48, 6 +; GCN-NEXT: v_writelane_b32 v45, s49, 7 +; GCN-NEXT: v_writelane_b32 v45, s50, 8 +; GCN-NEXT: v_writelane_b32 v45, s51, 9 +; GCN-NEXT: v_writelane_b32 v45, s52, 10 +; GCN-NEXT: v_writelane_b32 v45, s53, 11 +; GCN-NEXT: v_writelane_b32 v45, s54, 12 +; GCN-NEXT: v_writelane_b32 v45, s55, 13 +; GCN-NEXT: v_writelane_b32 v45, s64, 14 +; GCN-NEXT: v_writelane_b32 v45, s65, 15 +; GCN-NEXT: v_writelane_b32 v45, s66, 16 +; GCN-NEXT: v_writelane_b32 v45, s67, 17 +; GCN-NEXT: v_writelane_b32 v45, s68, 18 +; GCN-NEXT: v_writelane_b32 v45, s69, 19 +; GCN-NEXT: v_writelane_b32 v45, s70, 20 +; GCN-NEXT: v_writelane_b32 v45, s71, 21 +; GCN-NEXT: v_writelane_b32 v45, s80, 22 +; GCN-NEXT: v_writelane_b32 v45, s81, 23 +; GCN-NEXT: v_writelane_b32 v45, s30, 24 +; GCN-NEXT: v_writelane_b32 v45, s31, 25 ; GCN-NEXT: v_mov_b32_e32 v40, v31 ; GCN-NEXT: s_mov_b32 s54, s15 ; GCN-NEXT: s_mov_b32 s55, s14 @@ -427,32 +427,32 @@ define hidden void @blam() { ; GCN-NEXT: s_branch .LBB1_1 ; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock ; GCN-NEXT: s_or_b64 exec, exec, s[66:67] -; GCN-NEXT: v_readlane_b32 s81, v45, 25 -; GCN-NEXT: v_readlane_b32 s80, v45, 24 -; GCN-NEXT: v_readlane_b32 s71, v45, 23 -; GCN-NEXT: v_readlane_b32 s70, v45, 22 -; GCN-NEXT: v_readlane_b32 s69, v45, 21 -; GCN-NEXT: v_readlane_b32 s68, v45, 20 -; GCN-NEXT: v_readlane_b32 s67, v45, 19 -; GCN-NEXT: v_readlane_b32 s66, v45, 18 -; GCN-NEXT: v_readlane_b32 s65, v45, 17 -; GCN-NEXT: v_readlane_b32 s64, v45, 16 -; GCN-NEXT: v_readlane_b32 s55, v45, 15 -; GCN-NEXT: v_readlane_b32 s54, v45, 14 -; GCN-NEXT: v_readlane_b32 s53, v45, 13 -; GCN-NEXT: v_readlane_b32 s52, v45, 12 -; GCN-NEXT: v_readlane_b32 s51, v45, 11 -; GCN-NEXT: v_readlane_b32 s50, v45, 10 -; GCN-NEXT: v_readlane_b32 s49, v45, 9 -; GCN-NEXT: v_readlane_b32 s48, v45, 8 -; GCN-NEXT: v_readlane_b32 s39, v45, 7 -; GCN-NEXT: v_readlane_b32 s38, v45, 6 -; GCN-NEXT: v_readlane_b32 s37, v45, 5 -; GCN-NEXT: v_readlane_b32 s36, v45, 4 -; GCN-NEXT: v_readlane_b32 s35, v45, 3 -; GCN-NEXT: v_readlane_b32 s34, v45, 2 -; GCN-NEXT: v_readlane_b32 s31, v45, 1 -; GCN-NEXT: v_readlane_b32 s30, v45, 0 +; GCN-NEXT: v_readlane_b32 s30, v45, 24 +; GCN-NEXT: v_readlane_b32 s31, v45, 25 +; GCN-NEXT: v_readlane_b32 s81, v45, 23 +; GCN-NEXT: v_readlane_b32 s80, v45, 22 +; GCN-NEXT: v_readlane_b32 s71, v45, 21 +; GCN-NEXT: v_readlane_b32 s70, v45, 20 +; GCN-NEXT: v_readlane_b32 s69, v45, 19 +; GCN-NEXT: v_readlane_b32 s68, v45, 18 +; GCN-NEXT: v_readlane_b32 s67, v45, 17 +; GCN-NEXT: v_readlane_b32 s66, v45, 16 +; GCN-NEXT: v_readlane_b32 s65, v45, 15 +; GCN-NEXT: v_readlane_b32 s64, v45, 14 +; GCN-NEXT: v_readlane_b32 s55, v45, 13 +; GCN-NEXT: v_readlane_b32 s54, v45, 12 +; GCN-NEXT: v_readlane_b32 s53, v45, 11 +; GCN-NEXT: v_readlane_b32 s52, v45, 10 +; GCN-NEXT: v_readlane_b32 s51, v45, 9 +; GCN-NEXT: v_readlane_b32 s50, v45, 8 +; GCN-NEXT: v_readlane_b32 s49, v45, 7 +; GCN-NEXT: v_readlane_b32 s48, v45, 6 +; GCN-NEXT: v_readlane_b32 s39, v45, 5 +; GCN-NEXT: v_readlane_b32 s38, v45, 4 +; GCN-NEXT: v_readlane_b32 s37, v45, 3 +; GCN-NEXT: v_readlane_b32 s36, v45, 2 +; GCN-NEXT: v_readlane_b32 s35, v45, 1 +; GCN-NEXT: v_readlane_b32 s34, v45, 0 ; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 5beb2237466a8..6be261c2ecb5a 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -17,15 +17,18 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: v_writelane_b32 v44, s4, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v36, v16 ; GFX9-NEXT: v_mov_b32_e32 v35, v15 ; GFX9-NEXT: v_mov_b32_e32 v34, v14 ; GFX9-NEXT: v_mov_b32_e32 v33, v13 ; GFX9-NEXT: v_mov_b32_e32 v32, v12 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: ;;#ASMSTART @@ -35,13 +38,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 -; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v44, s30, 0 -; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_mov_b32_e32 v0, v40 @@ -52,8 +52,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: v_readlane_b32 s30, v44, 0 +; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v44, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -73,15 +73,18 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 ; GFX10-NEXT: v_writelane_b32 v44, s4, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v36, v16 ; GFX10-NEXT: v_mov_b32_e32 v35, v15 ; GFX10-NEXT: v_mov_b32_e32 v34, v14 ; GFX10-NEXT: v_mov_b32_e32 v33, v13 ; GFX10-NEXT: v_mov_b32_e32 v32, v12 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: ;;#ASMSTART @@ -91,14 +94,11 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v44, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_mov_b32_e32 v0, v40 @@ -110,8 +110,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s4, v44, 2 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -131,14 +131,20 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v44, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 -; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 -; GFX11-NEXT: v_mov_b32_e32 v32, v12 +; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-NEXT: v_writelane_b32 v44, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 +; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 +; GFX11-NEXT: v_mov_b32_e32 v32, v12 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ;;#ASMSTART @@ -148,13 +154,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v44, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41 @@ -164,8 +167,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -208,24 +211,24 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: v_writelane_b32 v45, s4, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v45, s30, 0 +; GFX9-NEXT: v_writelane_b32 v45, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v44, v16 ; GFX9-NEXT: v_mov_b32_e32 v43, v15 ; GFX9-NEXT: v_mov_b32_e32 v42, v14 ; GFX9-NEXT: v_mov_b32_e32 v41, v13 ; GFX9-NEXT: v_mov_b32_e32 v40, v12 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:44], s[4:11], s[4:7] dmask:0x1 -; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v45, s30, 0 -; GFX9-NEXT: v_writelane_b32 v45, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -237,8 +240,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v45, 1 ; GFX9-NEXT: v_readlane_b32 s30, v45, 0 +; GFX9-NEXT: v_readlane_b32 s31, v45, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v45, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -258,23 +261,23 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 ; GFX10-NEXT: v_writelane_b32 v45, s4, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v45, s30, 0 +; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v45, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v40, v16 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v41, v15 ; GFX10-NEXT: v_mov_b32_e32 v42, v14 -; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v43, v13 ; GFX10-NEXT: v_mov_b32_e32 v44, v12 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -288,8 +291,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 -; GFX10-NEXT: v_readlane_b32 s31, v45, 1 ; GFX10-NEXT: v_readlane_b32 s30, v45, 0 +; GFX10-NEXT: v_readlane_b32 s31, v45, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s4, v45, 2 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -309,22 +312,26 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:20 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v45, s0, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 +; GFX11-NEXT: v_writelane_b32 v45, s30, 0 +; GFX11-NEXT: v_writelane_b32 v45, s31, 1 ; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v45, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v40, v16 :: v_dual_mov_b32 v41, v15 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v42, v14 :: v_dual_mov_b32 v43, v13 -; GFX11-NEXT: v_writelane_b32 v45, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v44, v12 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off @@ -337,8 +344,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 -; GFX11-NEXT: v_readlane_b32 s31, v45, 1 ; GFX11-NEXT: v_readlane_b32 s30, v45, 0 +; GFX11-NEXT: v_readlane_b32 s31, v45, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v45, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index fe3a6c59f1728..110013258bd89 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -3077,17 +3077,17 @@ define void @callee_no_stack_with_call() #1 { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_mov_b32 exec_lo, s17 ; GFX1032-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1032-NEXT: v_writelane_b32 v40, s30, 0 ; GFX1032-NEXT: s_addk_i32 s32, 0x200 +; GFX1032-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1032-NEXT: s_getpc_b64 s[16:17] ; GFX1032-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 -; GFX1032-NEXT: v_writelane_b32 v40, s30, 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX1032-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1032-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1032-NEXT: v_readlane_b32 s30, v40, 0 +; GFX1032-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1032-NEXT: s_mov_b32 s32, s33 ; GFX1032-NEXT: v_readlane_b32 s4, v40, 2 ; GFX1032-NEXT: s_or_saveexec_b32 s5, -1 @@ -3108,17 +3108,17 @@ define void @callee_no_stack_with_call() #1 { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, s[18:19] ; GFX1064-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1064-NEXT: v_writelane_b32 v40, s30, 0 ; GFX1064-NEXT: s_addk_i32 s32, 0x400 +; GFX1064-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1064-NEXT: s_getpc_b64 s[16:17] ; GFX1064-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 -; GFX1064-NEXT: v_writelane_b32 v40, s30, 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX1064-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1064-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1064-NEXT: v_readlane_b32 s30, v40, 0 +; GFX1064-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1064-NEXT: s_mov_b32 s32, s33 ; GFX1064-NEXT: v_readlane_b32 s4, v40, 2 ; GFX1064-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index 61a15747ae033..75e06aed64748 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -382,10 +382,10 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber CSR ; DAGISEL-NEXT: ;;#ASMEND -; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber non-CSR ; DAGISEL-NEXT: ;;#ASMEND @@ -424,10 +424,10 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber CSR ; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber non-CSR ; GISEL-NEXT: ;;#ASMEND @@ -466,10 +466,10 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber CSR ; DAGISEL64-NEXT: ;;#ASMEND -; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber non-CSR ; DAGISEL64-NEXT: ;;#ASMEND @@ -509,10 +509,10 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber CSR ; GISEL64-NEXT: ;;#ASMEND -; GISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber non-CSR ; GISEL64-NEXT: ;;#ASMEND @@ -550,11 +550,11 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: ;;#ASMSTART ; GFX1250-DAGISEL-NEXT: ; clobber CSR ; GFX1250-DAGISEL-NEXT: ;;#ASMEND -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GFX1250-DAGISEL-NEXT: ;;#ASMSTART ; GFX1250-DAGISEL-NEXT: ; clobber non-CSR ; GFX1250-DAGISEL-NEXT: ;;#ASMEND @@ -1791,19 +1791,18 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 +; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 +; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 +; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL-NEXT: v_swap_b32 v0, v1 ; DAGISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 ; DAGISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; DAGISEL-NEXT: v_readlane_b32 s30, v40, 1 +; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; DAGISEL-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL-NEXT: v_readlane_b32 s0, v40, 3 ; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -2266,19 +2265,18 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v40, s0, 3 +; GISEL-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL-NEXT: v_writelane_b32 v40, s4, 0 +; GISEL-NEXT: v_writelane_b32 v40, s30, 1 +; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 ; GISEL-NEXT: v_swap_b32 v0, v1 ; GISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL-NEXT: v_writelane_b32 v40, s30, 1 ; GISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; GISEL-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GISEL-NEXT: v_readlane_b32 s30, v40, 1 +; GISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GISEL-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL-NEXT: v_readlane_b32 s0, v40, 3 ; GISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -2741,20 +2739,19 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v40, s0, 4 +; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 +; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 +; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2 +; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL64-NEXT: v_swap_b32 v0, v1 ; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2 -; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; DAGISEL64-NEXT: v_readlane_b32 s30, v40, 2 +; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; DAGISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; DAGISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL64-NEXT: v_readlane_b32 s0, v40, 4 @@ -3218,20 +3215,19 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v40, s0, 4 +; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 +; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 +; GISEL64-NEXT: v_writelane_b32 v40, s30, 2 +; GISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; GISEL64-NEXT: v_mov_b32_e32 v2, v0 ; GISEL64-NEXT: v_swap_b32 v0, v1 ; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL64-NEXT: v_writelane_b32 v40, s30, 2 -; GISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; GISEL64-NEXT: v_readlane_b32 s30, v40, 2 +; GISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; GISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; GISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL64-NEXT: v_readlane_b32 s0, v40, 4 @@ -5233,17 +5229,17 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 +; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1 ; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], gfx_callee@abs64 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 -; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 +; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] -; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s30, v40, 1 +; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s4, v40, 0 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s0, v40, 3 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -11155,18 +11151,18 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 +; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 +; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 +; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 -; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; DAGISEL-NEXT: v_readlane_b32 s30, v42, 1 +; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; DAGISEL-NEXT: v_readlane_b32 s4, v42, 0 ; DAGISEL-NEXT: v_readlane_b32 s0, v42, 3 ; DAGISEL-NEXT: s_clause 0x2 @@ -11637,18 +11633,18 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v42, s0, 3 +; GISEL-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL-NEXT: v_writelane_b32 v42, s4, 0 +; GISEL-NEXT: v_writelane_b32 v42, s30, 1 +; GISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 -; GISEL-NEXT: v_writelane_b32 v42, s30, 1 -; GISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL-NEXT: flat_store_b32 v[40:41], v0 -; GISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GISEL-NEXT: v_readlane_b32 s30, v42, 1 +; GISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GISEL-NEXT: v_readlane_b32 s4, v42, 0 ; GISEL-NEXT: v_readlane_b32 s0, v42, 3 ; GISEL-NEXT: s_clause 0x2 @@ -12119,20 +12115,20 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v42, s0, 4 +; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0 +; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1 +; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2 +; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi ; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 -; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1 ; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 -; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2 -; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; DAGISEL64-NEXT: v_readlane_b32 s30, v42, 2 +; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; DAGISEL64-NEXT: v_readlane_b32 s5, v42, 1 ; DAGISEL64-NEXT: v_readlane_b32 s4, v42, 0 ; DAGISEL64-NEXT: v_readlane_b32 s0, v42, 4 @@ -12604,20 +12600,20 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v42, s0, 4 +; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL64-NEXT: v_writelane_b32 v42, s4, 0 +; GISEL64-NEXT: v_writelane_b32 v42, s5, 1 +; GISEL64-NEXT: v_writelane_b32 v42, s30, 2 +; GISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo ; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL64-NEXT: v_mov_b32_e32 v40, v8 -; GISEL64-NEXT: v_writelane_b32 v42, s5, 1 ; GISEL64-NEXT: v_mov_b32_e32 v41, v9 -; GISEL64-NEXT: v_writelane_b32 v42, s30, 2 -; GISEL64-NEXT: v_writelane_b32 v42, s31, 3 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL64-NEXT: flat_store_b32 v[40:41], v0 -; GISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; GISEL64-NEXT: v_readlane_b32 s30, v42, 2 +; GISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; GISEL64-NEXT: v_readlane_b32 s5, v42, 1 ; GISEL64-NEXT: v_readlane_b32 s4, v42, 0 ; GISEL64-NEXT: v_readlane_b32 s0, v42, 4 @@ -14627,17 +14623,17 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x2 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 +; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 -; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] ; GFX1250-DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s30, v42, 1 +; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s4, v42, 0 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s0, v42, 3 ; GFX1250-DAGISEL-NEXT: s_clause 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll index 06c451869e841..9eea46172ce81 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -22,9 +22,9 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: v_writelane_b32 v40, s28, 2 ; GFX90A-NEXT: v_writelane_b32 v40, s29, 3 ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 -; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s20 ; GFX90A-NEXT: ;;#ASMEND @@ -41,12 +41,12 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 ; GFX90A-NEXT: v_accvgpr_read_b32 v39, a32 ; GFX90A-NEXT: s_mov_b64 exec, s[28:29] +; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 ; GFX90A-NEXT: v_readlane_b32 s20, v39, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s20 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 ; GFX90A-NEXT: s_mov_b32 s32, s33 ; GFX90A-NEXT: v_readlane_b32 s4, v40, 4 ; GFX90A-NEXT: v_readlane_b32 s28, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll index 9e9fe1809c780..b3ad8880b85a9 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -27,9 +27,9 @@ define void @test() #0 { ; GCN-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s16 ; GCN-NEXT: ;;#ASMEND @@ -49,10 +49,10 @@ define void @test() #0 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s4, v39, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-NEXT: v_readlane_b32 s28, v40, 2 @@ -111,8 +111,8 @@ define void @test() #0 { ; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 ; GCN-O0-NEXT: global_store_dword v[0:1], v2, off ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-O0-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-O0-NEXT: s_mov_b32 s32, s33 ; GCN-O0-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-O0-NEXT: v_readlane_b32 s28, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 74e9ab718c3d2..f28ceb4e0d8b7 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -387,8 +387,8 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: s_mov_b32 s32, s33 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -424,9 +424,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_mov_b32 s32, s33 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -624,8 +624,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: s_mov_b32 s32, s33 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -685,9 +685,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_mov_b32 s32, s33 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload